From 6bb6922de92d8413321f57f74769380c6b6f45ee Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Fri, 25 Oct 2024 11:53:47 +0200 Subject: [PATCH 001/425] [LLD][COFF] Allow overriding EC alias symbols with alternate names (#113456) --- lld/COFF/Driver.cpp | 16 ++++- lld/test/COFF/arm64ec-altnames.s | 109 +++++++++++++++++++++++++++++++ llvm/include/llvm/IR/Mangler.h | 6 ++ 3 files changed, 128 insertions(+), 3 deletions(-) create mode 100644 lld/test/COFF/arm64ec-altnames.s diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index e7f768789271fac..08c1476a595f644 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -2518,9 +2518,19 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { Symbol *sym = ctx.symtab.find(from); if (!sym) continue; - if (auto *u = dyn_cast(sym)) - if (!u->weakAlias) - u->setWeakAlias(ctx.symtab.addUndefined(to)); + if (auto *u = dyn_cast(sym)) { + if (u->weakAlias) { + // On ARM64EC, anti-dependency aliases are treated as undefined + // symbols unless a demangled symbol aliases a defined one, which is + // part of the implementation. + if (!isArm64EC(ctx.config.machine) || !u->isAntiDep) + continue; + if (!isa(u->weakAlias) && + !isArm64ECMangledFunctionName(u->getName())) + continue; + } + u->setWeakAlias(ctx.symtab.addUndefined(to)); + } } // If any inputs are bitcode files, the LTO code generator may create diff --git a/lld/test/COFF/arm64ec-altnames.s b/lld/test/COFF/arm64ec-altnames.s new file mode 100644 index 000000000000000..fb28ae15895f989 --- /dev/null +++ b/lld/test/COFF/arm64ec-altnames.s @@ -0,0 +1,109 @@ +REQUIRES: aarch64 +RUN: split-file %s %t.dir && cd %t.dir + +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows ext.s -o ext.obj +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows impl.s -o impl.obj +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows impl-cpp.s -o impl-cpp.obj +RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig.obj + +# Ensure -alternatename can change a mangled function symbol aliasing a defined symbol (typically a guest exit thunk). + +RUN: lld-link -machine:arm64ec -dll -noentry -out:out1.dll ext.obj loadconfig.obj "-alternatename:#func=altsym" + +RUN: llvm-objdump -d out1.dll | FileCheck --check-prefix=DISASM %s +DISASM: 0000000180001000 <.text>: +DISASM-NEXT: 180001000: 52800020 mov w0, #0x1 // =1 +DISASM-NEXT: 180001004: d65f03c0 ret +DISASM-NOT: .thnk + +RUN: llvm-readobj --hex-dump=.test out1.dll | FileCheck --check-prefix=TESTSEC %s +TESTSEC: 0x180004000 00100000 00100000 + +# Ensure -alternatename can change a demangled function symbol aliasing an anti-dependency symbol. + +RUN: lld-link -machine:arm64ec -dll -noentry -out:out2.dll ext.obj loadconfig.obj -alternatename:func=altsym + +RUN: llvm-objdump -d out2.dll | FileCheck --check-prefix=DISASM2 %s +DISASM2: Disassembly of section .text: +DISASM2-EMPTY: +DISASM2-NEXT: 0000000180001000 <.text>: +DISASM2-NEXT: 180001000: 52800020 mov w0, #0x1 // =1 +DISASM2-NEXT: 180001004: d65f03c0 ret +DISASM2-EMPTY: +DISASM2-NEXT: Disassembly of section .thnk: +DISASM2-EMPTY: +DISASM2-NEXT: 0000000180005000 <.thnk>: +DISASM2-NEXT: 180005000: 52800040 mov w0, #0x2 // =2 +DISASM2-NEXT: 180005004: d65f03c0 ret + +RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=TESTSEC2 %s +TESTSEC2: 0x180004000 00100000 00500000 + +# Ensure -alternatename cannot modify a demangled function symbol aliasing a defined symbol. + +RUN: lld-link -machine:arm64ec -dll -noentry -out:out3.dll impl.obj loadconfig.obj -alternatename:func=altsym +RUN: llvm-objdump -d out3.dll | FileCheck --check-prefix=DISASM %s +RUN: llvm-readobj --hex-dump=.test out3.dll | FileCheck --check-prefix=TESTSEC %s + +RUN: lld-link -machine:arm64ec -dll -noentry -out:out4.dll impl-cpp.obj loadconfig.obj -alternatename:func=altsym +RUN: llvm-objdump -d out4.dll | FileCheck --check-prefix=DISASM %s +RUN: llvm-readobj --hex-dump=.test out4.dll | FileCheck --check-prefix=TESTSEC %s + +#--- ext.s + .weak_anti_dep func +.set func, "#func" + .weak_anti_dep "#func" +.set "#func", thunksym + + .section .test, "r" + .rva func + .rva "#func" + + .section .thnk,"xr",discard,thunksym +thunksym: + mov w0, #2 + ret + + .section .text,"xr",discard,altsym + .globl altsym +altsym: + mov w0, #1 + ret + +#--- impl.s + .weak_anti_dep func +.set func, "#func" + + .section .test, "r" + .rva func + .rva "#func" + + .section .text,"xr",discard,"#func" +"#func": + mov w0, #1 + ret + + .section .text,"xr",discard,altsym + .globl altsym +altsym: + mov w0, #2 + ret + +#--- impl-cpp.s + .weak_anti_dep func +.set func, "?func@@$$hYAXXZ" + + .section .test, "r" + .rva func + .rva "?func@@$$hYAXXZ" + + .section .text,"xr",discard,"?func@@$$hYAXXZ" +"?func@@$$hYAXXZ": + mov w0, #1 + ret + + .section .text,"xr",discard,altsym + .globl altsym +altsym: + mov w0, #2 + ret diff --git a/llvm/include/llvm/IR/Mangler.h b/llvm/include/llvm/IR/Mangler.h index 349f9e6e7523399..3c3f0c6dce80fa8 100644 --- a/llvm/include/llvm/IR/Mangler.h +++ b/llvm/include/llvm/IR/Mangler.h @@ -61,6 +61,12 @@ std::optional getArm64ECMangledFunctionName(StringRef Name); /// mangled. std::optional getArm64ECDemangledFunctionName(StringRef Name); +/// Check if an ARM64EC function name is mangled. +bool inline isArm64ECMangledFunctionName(StringRef Name) { + return Name[0] == '#' || + (Name[0] == '?' && Name.find("$$h") != StringRef::npos); +} + } // End llvm namespace #endif From 86d65ae7949e0322f10e1856c5c33caa34ebfe2f Mon Sep 17 00:00:00 2001 From: T-Gruber <100079402+T-Gruber@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:59:16 +0200 Subject: [PATCH 002/425] [analyzer] Improve FieldRegion descriptive name (#112313) The current implementation of MemRegion::getDescriptiveName fails for FieldRegions whose SuperRegion is an ElementRegion. As outlined below: ```Cpp struct val_struct { int val; }; extern struct val_struct val_struct_array[3]; void func(){ // FieldRegion with ElementRegion as SuperRegion. val_struct_array[0].val; } ``` For this special case, the expression cannot be pretty printed and must therefore be obtained separately. --- clang/lib/StaticAnalyzer/Core/MemRegion.cpp | 27 +++++++++++++---- .../MemRegionDescriptiveNameTest.cpp | 29 ++++++++++++++++++- 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index 0a29a050bbc2bab..02d1358a2001ef4 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -722,6 +722,13 @@ std::string MemRegion::getDescriptiveName(bool UseQuotes) const { SmallString<50> buf; llvm::raw_svector_ostream os(buf); + // Enclose subject with single quotes if needed. + auto QuoteIfNeeded = [UseQuotes](const Twine &Subject) -> std::string { + if (UseQuotes) + return ("'" + Subject + "'").str(); + return Subject.str(); + }; + // Obtain array indices to add them to the variable name. const ElementRegion *ER = nullptr; while ((ER = R->getAs())) { @@ -751,12 +758,20 @@ std::string MemRegion::getDescriptiveName(bool UseQuotes) const { } // Get variable name. - if (R && R->canPrintPrettyAsExpr()) { - R->printPrettyAsExpr(os); - if (UseQuotes) - return (llvm::Twine("'") + os.str() + ArrayIndices + "'").str(); - else - return (llvm::Twine(os.str()) + ArrayIndices).str(); + if (R) { + // MemRegion can be pretty printed. + if (R->canPrintPrettyAsExpr()) { + R->printPrettyAsExpr(os); + return QuoteIfNeeded(llvm::Twine(os.str()) + ArrayIndices); + } + + // FieldRegion may have ElementRegion as SuperRegion. + if (const auto *FR = R->getAs()) { + std::string Super = FR->getSuperRegion()->getDescriptiveName(false); + if (Super.empty()) + return ""; + return QuoteIfNeeded(Super + "." + FR->getDecl()->getName()); + } } return VariableName; diff --git a/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp b/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp index b13e7123ee524d1..0f6e49bf42f4acc 100644 --- a/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp +++ b/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp @@ -12,7 +12,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h" #include "gtest/gtest.h" -#include using namespace clang; using namespace ento; @@ -143,4 +142,32 @@ void top() { EXPECT_EQ(Output, "DescriptiveNameChecker: array[x]\n"); } +TEST(MemRegionDescriptiveNameTest, FieldRegWithSuperElementReg) { + StringRef Code = R"cpp( +void reportDescriptiveName(int *p); +struct val_struct { int val; }; +extern struct val_struct val_struct_array[3]; +void top() { + reportDescriptiveName(&val_struct_array[0].val); +})cpp"; + + std::string Output; + ASSERT_TRUE(runChecker(Code, Output)); + EXPECT_EQ(Output, "DescriptiveNameChecker: val_struct_array[0].val\n"); +} + +TEST(MemRegionDescriptiveNameTest, FieldRegWithSuperMultidimElementReg) { + StringRef Code = R"cpp( +void reportDescriptiveName(int *p); +struct val_struct { int val; }; +extern struct val_struct val_struct_array[3][4]; +void top() { + reportDescriptiveName(&val_struct_array[1][2].val); +})cpp"; + + std::string Output; + ASSERT_TRUE(runChecker(Code, Output)); + EXPECT_EQ(Output, "DescriptiveNameChecker: val_struct_array[1][2].val\n"); +} + } // namespace From d87964de78ce692fd132ea453c32e4435309a306 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Fri, 25 Oct 2024 11:30:16 +0100 Subject: [PATCH 003/425] [OpenMP][OMPIRBuilder] Error propagation across callbacks (#112533) This patch implements an approach to communicate errors between the OMPIRBuilder and its users. It introduces `llvm::Error` and `llvm::Expected` objects to replace the values returned by callbacks passed to `OMPIRBuilder` codegen functions. These functions then check the result for errors when callbacks are called and forward them back to the caller, which has the flexibility to recover, exit cleanly or dump a stack trace. This prevents a failed callback to leave the IR in an invalid state and still continue the codegen process, triggering unrelated assertions or segmentation faults. In the case of MLIR to LLVM IR translation of the 'omp' dialect, this change results in the compiler emitting errors and exiting early instead of triggering a crash for not-yet-implemented errors. The behavior in Clang and openmp-opt stays unchanged, since callbacks will continue always returning 'success'. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 32 +- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 13 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 90 ++- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 259 +++---- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 615 +++++++++++------ llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 35 +- .../Frontend/OpenMPIRBuilderTest.cpp | 649 ++++++++++++------ .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 632 ++++++++++------- 8 files changed, 1467 insertions(+), 858 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c0931e82d9875a9..d714af035d21a2a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1192,6 +1192,7 @@ struct PushAndPopStackRAII { CodeGenFunction::JumpDest Dest = CGF.getOMPCancelDestination(OMPD_parallel); CGF.EmitBranchThroughCleanup(Dest); + return llvm::Error::success(); }; // TODO: Remove this once we emit parallel regions through the @@ -2331,8 +2332,11 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, auto *OMPRegionInfo = dyn_cast_or_null(CGF.CapturedStmtInfo); if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { - CGF.Builder.restoreIP(OMPBuilder.createBarrier( - CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall, + EmitChecks); + assert(AfterIP && "unexpected error creating barrier"); + CGF.Builder.restoreIP(*AfterIP); return; } @@ -5928,8 +5932,10 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); }; - OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, - IsOffloadEntry, OutlinedFn, OutlinedFnID); + llvm::Error Err = OMPBuilder.emitTargetRegionFunction( + EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn, + OutlinedFnID); + assert(!Err && "unexpected error creating target region"); if (!OutlinedFn) return; @@ -9670,9 +9676,12 @@ static void emitTargetCallKernelLaunch( NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, DynCGGroupMem, HasNoWait); - CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( - CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID, - RTLoc, AllocaIP)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPRuntime->getOMPBuilder().emitKernelLaunch( + CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID, + RTLoc, AllocaIP); + assert(AfterIP && "unexpected error creating kernel launch"); + CGF.Builder.restoreIP(*AfterIP); }; if (RequiresOuterTask) @@ -10349,9 +10358,12 @@ void CGOpenMPRuntime::emitTargetDataCalls( InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), CGF.Builder.GetInsertPoint()); llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); - CGF.Builder.restoreIP(OMPBuilder.createTargetData( - OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, - /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createTargetData( + OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, + /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc); + assert(AfterIP && "unexpected error creating target data"); + CGF.Builder.restoreIP(*AfterIP); } void CGOpenMPRuntime::emitTargetDataStandAloneCall( diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index c66d5d11b0bbfa3..598b946ad88dbbf 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1753,11 +1753,14 @@ void CGOpenMPRuntimeGPU::emitReduction( Idx++; } - CGF.Builder.restoreIP(OMPBuilder.createReductionsGPU( - OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction, - DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang, - CGF.getTarget().getGridValue(), C.getLangOpts().OpenMPCUDAReductionBufNum, - RTLoc)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createReductionsGPU( + OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction, + DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang, + CGF.getTarget().getGridValue(), + C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc); + assert(AfterIP && "unexpected error creating GPU reductions"); + CGF.Builder.restoreIP(*AfterIP); return; } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 0da7855ab05c6cc..1c32a675380c7f1 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1809,6 +1809,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // thus calls destructors etc. auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + return llvm::Error::success(); }; // Privatization callback that performs appropriate action for @@ -1831,15 +1832,18 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { InsertPointTy CodeGenIP) { OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); + return llvm::Error::success(); }; CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - Builder.restoreIP( + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, - IfCond, NumThreads, ProcBind, S.hasCancel())); + IfCond, NumThreads, ProcBind, S.hasCancel()); + assert(AfterIP && "unexpected error creating parallel"); + Builder.restoreIP(*AfterIP); return; } @@ -2128,9 +2132,13 @@ void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { RunCleanupsScope BodyScope(*this); EmitStmt(BodyStmt); + return llvm::Error::success(); }; - llvm::CanonicalLoopInfo *CL = + + llvm::Expected Result = OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); + assert(Result && "unexpected error creating canonical loop"); + llvm::CanonicalLoopInfo *CL = *Result; // Finish up the loop. Builder.restoreIP(CL->getAfterIP()); @@ -4016,11 +4024,13 @@ static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF, CGM.getOpenMPRuntime().getOMPBuilder(); llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); - OMPBuilder.applyWorkshareLoop( - CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, - SchedKind, ChunkSize, /*HasSimdModifier=*/false, - /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, - /*HasOrderedClause=*/false); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.applyWorkshareLoop( + CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, + NeedsBarrier, SchedKind, ChunkSize, /*HasSimdModifier=*/false, + /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, + /*HasOrderedClause=*/false); + assert(AfterIP && "unexpected error creating workshare loop"); return; } @@ -4257,6 +4267,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + return llvm::Error::success(); }; const CapturedStmt *ICS = S.getInnermostCapturedStmt(); @@ -4269,6 +4280,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { InsertPointTy CodeGenIP) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( *this, SubStmt, AllocaIP, CodeGenIP, "section"); + return llvm::Error::success(); }; SectionCBVector.push_back(SectionCB); } @@ -4277,6 +4289,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { InsertPointTy CodeGenIP) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( *this, CapturedStmt, AllocaIP, CodeGenIP, "section"); + return llvm::Error::success(); }; SectionCBVector.push_back(SectionCB); } @@ -4298,9 +4311,12 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - Builder.restoreIP(OMPBuilder.createSections( - Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), - S.getSingleClause())); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createSections(Builder, AllocaIP, SectionCBVector, PrivCB, + FiniCB, S.hasCancel(), + S.getSingleClause()); + assert(AfterIP && "unexpected error creating sections"); + Builder.restoreIP(*AfterIP); return; } { @@ -4326,17 +4342,22 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + return llvm::Error::success(); }; auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); + return llvm::Error::success(); }; LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); - Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createSection(Builder, BodyGenCB, FiniCB); + assert(AfterIP && "unexpected error creating section"); + Builder.restoreIP(*AfterIP); return; } @@ -4407,17 +4428,22 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + return llvm::Error::success(); }; auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master"); + return llvm::Error::success(); }; LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); - Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB); + assert(AfterIP && "unexpected error creating master"); + Builder.restoreIP(*AfterIP); return; } @@ -4453,18 +4479,22 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + return llvm::Error::success(); }; auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked"); + return llvm::Error::success(); }; LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); - Builder.restoreIP( - OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal); + assert(AfterIP && "unexpected error creating masked"); + Builder.restoreIP(*AfterIP); return; } @@ -4493,19 +4523,23 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + return llvm::Error::success(); }; auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical"); + return llvm::Error::success(); }; LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); - Builder.restoreIP(OMPBuilder.createCritical( - Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), - HintInst)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB, + S.getDirectiveName().getAsString(), HintInst); + assert(AfterIP && "unexpected error creating critical"); + Builder.restoreIP(*AfterIP); return; } @@ -5464,11 +5498,15 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); + return llvm::Error::success(); }; CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; if (!CapturedStmtInfo) CapturedStmtInfo = &CapStmtInfo; - Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB); + assert(AfterIP && "unexpected error creating taskgroup"); + Builder.restoreIP(*AfterIP); return; } auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -6041,6 +6079,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { auto FiniCB = [this](InsertPointTy IP) { OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + return llvm::Error::success(); }; auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, @@ -6064,11 +6103,14 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered"); } + return llvm::Error::success(); }; OMPLexicalScope Scope(*this, S, OMPD_unknown); - Builder.restoreIP( - OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C); + assert(AfterIP && "unexpected error creating ordered"); + Builder.restoreIP(*AfterIP); } return; } @@ -7344,8 +7386,10 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { if (IfCond) IfCondition = EmitScalarExpr(IfCond, /*IgnoreResultAssign=*/true); - return Builder.restoreIP( - OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()); + assert(AfterIP && "unexpected error creating cancel"); + return Builder.restoreIP(*AfterIP); } } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index c4735ec41e71340..3afb9d84278e81a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -520,6 +520,9 @@ class OpenMPIRBuilder { /// Type used throughout for insertion points. using InsertPointTy = IRBuilder<>::InsertPoint; + /// Type used to represent an insertion point or an error value. + using InsertPointOrErrorTy = Expected; + /// Get the create a name using the platform specific separators. /// \param Parts parts of the final name that needs separation /// The created name has a first separator between the first and second part @@ -538,7 +541,7 @@ class OpenMPIRBuilder { /// A finalize callback knows about all objects that need finalization, e.g. /// destruction, when the scope of the currently generated construct is left /// at the time, and location, the callback is invoked. - using FinalizeCallbackTy = std::function; + using FinalizeCallbackTy = std::function; struct FinalizationInfo { /// The finalization callback provided by the last in-flight invocation of @@ -589,15 +592,19 @@ class OpenMPIRBuilder { /// not be split. /// \param CodeGenIP is the insertion point at which the body code should be /// placed. + /// + /// \return an error, if any were triggered during execution. using BodyGenCallbackTy = - function_ref; + function_ref; // This is created primarily for sections construct as llvm::function_ref // (BodyGenCallbackTy) is not storable (as described in the comments of // function_ref class - function_ref contains non-ownable reference // to the callable. + /// + /// \return an error, if any were triggered during execution. using StorableBodyGenCallbackTy = - std::function; + std::function; /// Callback type for loop body code generation. /// @@ -607,8 +614,10 @@ class OpenMPIRBuilder { /// terminated with an unconditional branch to the loop /// latch. /// \param IndVar is the induction variable usable at the insertion point. + /// + /// \return an error, if any were triggered during execution. using LoopBodyGenCallbackTy = - function_ref; + function_ref; /// Callback type for variable privatization (think copy & default /// constructor). @@ -628,7 +637,7 @@ class OpenMPIRBuilder { /// /// \returns The new insertion point where code generation continues and /// \p ReplVal the replacement value. - using PrivatizeCallbackTy = function_ref; @@ -658,9 +667,10 @@ class OpenMPIRBuilder { /// \param ThreadID Optional parameter to pass in any existing ThreadID value. /// /// \returns The insertion point after the barrier. - InsertPointTy createBarrier(const LocationDescription &Loc, - omp::Directive Kind, bool ForceSimpleCall = false, - bool CheckCancelFlag = true); + InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, + omp::Directive Kind, + bool ForceSimpleCall = false, + bool CheckCancelFlag = true); /// Generator for '#omp cancel' /// @@ -669,8 +679,9 @@ class OpenMPIRBuilder { /// \param CanceledDirective The kind of directive that is cancled. /// /// \returns The insertion point after the barrier. - InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, - omp::Directive CanceledDirective); + InsertPointOrErrorTy createCancel(const LocationDescription &Loc, + Value *IfCondition, + omp::Directive CanceledDirective); /// Generator for '#omp parallel' /// @@ -685,7 +696,7 @@ class OpenMPIRBuilder { /// \param IsCancellable Flag to indicate a cancellable parallel region. /// /// \returns The insertion position *after* the parallel. - IRBuilder<>::InsertPoint + InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, @@ -711,10 +722,10 @@ class OpenMPIRBuilder { /// /// \returns An object representing the created control flow structure which /// can be used for loop-associated directives. - CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, - LoopBodyGenCallbackTy BodyGenCB, - Value *TripCount, - const Twine &Name = "loop"); + Expected + createCanonicalLoop(const LocationDescription &Loc, + LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, + const Twine &Name = "loop"); /// Generator for the control flow structure of an OpenMP canonical loop. /// @@ -764,12 +775,10 @@ class OpenMPIRBuilder { /// /// \returns An object representing the created control flow structure which /// can be used for loop-associated directives. - CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc, - LoopBodyGenCallbackTy BodyGenCB, - Value *Start, Value *Stop, Value *Step, - bool IsSigned, bool InclusiveStop, - InsertPointTy ComputeIP = {}, - const Twine &Name = "loop"); + Expected createCanonicalLoop( + const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, + Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, + InsertPointTy ComputeIP = {}, const Twine &Name = "loop"); /// Collapse a loop nest into a single loop. /// @@ -996,9 +1005,10 @@ class OpenMPIRBuilder { /// the loop. /// /// \returns Point where to insert code after the workshare construct. - InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, - bool NeedsBarrier); + InsertPointOrErrorTy applyStaticWorkshareLoop(DebugLoc DL, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier); /// Modifies the canonical loop a statically-scheduled workshare loop with a /// user-specified chunk size. @@ -1013,11 +1023,11 @@ class OpenMPIRBuilder { /// \param ChunkSize The user-specified chunk size. /// /// \returns Point where to insert code after the workshare construct. - InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL, - CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, - bool NeedsBarrier, - Value *ChunkSize); + InsertPointOrErrorTy applyStaticChunkedWorkshareLoop(DebugLoc DL, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier, + Value *ChunkSize); /// Modifies the canonical loop to be a dynamically-scheduled workshare loop. /// @@ -1039,11 +1049,12 @@ class OpenMPIRBuilder { /// scheduling. If \p nullptr, defaults to 1. /// /// \returns Point where to insert code after the workshare construct. - InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, - InsertPointTy AllocaIP, - omp::OMPScheduleType SchedType, - bool NeedsBarrier, - Value *Chunk = nullptr); + InsertPointOrErrorTy applyDynamicWorkshareLoop(DebugLoc DL, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + omp::OMPScheduleType SchedType, + bool NeedsBarrier, + Value *Chunk = nullptr); /// Create alternative version of the loop to support if clause /// @@ -1094,7 +1105,7 @@ class OpenMPIRBuilder { /// It corresponds to type of loop workshare OpenMP pragma. /// /// \returns Point where to insert code after the workshare construct. - InsertPointTy applyWorkshareLoop( + InsertPointOrErrorTy applyWorkshareLoop( DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default, @@ -1251,20 +1262,21 @@ class OpenMPIRBuilder { /// cannot be resumed until execution of the structured /// block that is associated with the generated task is /// completed. - InsertPointTy createTask(const LocationDescription &Loc, - InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, - bool Tied = true, Value *Final = nullptr, - Value *IfCondition = nullptr, - SmallVector Dependencies = {}); + InsertPointOrErrorTy createTask(const LocationDescription &Loc, + InsertPointTy AllocaIP, + BodyGenCallbackTy BodyGenCB, bool Tied = true, + Value *Final = nullptr, + Value *IfCondition = nullptr, + SmallVector Dependencies = {}); /// Generator for the taskgroup construct /// /// \param Loc The location where the taskgroup construct was encountered. /// \param AllocaIP The insertion point to be used for alloca instructions. /// \param BodyGenCB Callback that will generate the region code. - InsertPointTy createTaskgroup(const LocationDescription &Loc, - InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB); + InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, + InsertPointTy AllocaIP, + BodyGenCallbackTy BodyGenCB); using FileIdentifierInfoCallbackTy = std::function()>; @@ -1302,15 +1314,15 @@ class OpenMPIRBuilder { /// \param CodeGenIP InsertPoint for CodeGen. /// \param LHS Pass in the LHS Value to be used for CodeGen. /// \param RHS Pass in the RHS Value to be used for CodeGen. - using ReductionGenCBTy = std::function; /// Functions used to generate atomic reductions. Such functions take two /// Values representing pointers to LHS and RHS of the reduction, as well as /// the element type of these pointers. They are expected to atomically /// update the LHS to the reduced value. - using ReductionGenAtomicCBTy = - std::function; + using ReductionGenAtomicCBTy = std::function; /// Enum class for reduction evaluation types scalar, complex and aggregate. enum class EvalKind { Scalar, Complex, Aggregate }; @@ -1510,9 +1522,10 @@ class OpenMPIRBuilder { /// need to be copied to the new function. /// /// \return The InterWarpCopy function. - Function *emitInterWarpCopyFunction(const LocationDescription &Loc, - ArrayRef ReductionInfos, - AttributeList FuncAttrs); + Expected + emitInterWarpCopyFunction(const LocationDescription &Loc, + ArrayRef ReductionInfos, + AttributeList FuncAttrs); /// This function emits a helper that copies all the reduction variables from /// the team into the provided global buffer for the reduction variables. @@ -1604,7 +1617,7 @@ class OpenMPIRBuilder { /// need to be copied to the new function. /// /// \return The reduction function. - Function *createReductionFunction( + Expected createReductionFunction( StringRef ReducerName, ArrayRef ReductionInfos, ReductionGenCBKind ReductionGenCBKind = ReductionGenCBKind::MLIR, AttributeList FuncAttrs = {}); @@ -1871,7 +1884,7 @@ class OpenMPIRBuilder { /// \param ReductionBufNum Optional OpenMPCUDAReductionBufNumValue to be /// used for teams reduction. /// \param SrcLocInfo Source location information global. - InsertPointTy createReductionsGPU( + InsertPointOrErrorTy createReductionsGPU( const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef ReductionInfos, bool IsNoWait = false, bool IsTeamsReduction = false, @@ -1943,10 +1956,11 @@ class OpenMPIRBuilder { /// \param IsNoWait A flag set if the reduction is marked as nowait. /// \param IsByRef A flag set if the reduction is using reference /// or direct value. - InsertPointTy createReductions(const LocationDescription &Loc, - InsertPointTy AllocaIP, - ArrayRef ReductionInfos, - ArrayRef IsByRef, bool IsNoWait = false); + InsertPointOrErrorTy createReductions(const LocationDescription &Loc, + InsertPointTy AllocaIP, + ArrayRef ReductionInfos, + ArrayRef IsByRef, + bool IsNoWait = false); ///} @@ -2002,9 +2016,11 @@ class OpenMPIRBuilder { /// \param CancelFlag Flag indicating if the cancellation is performed. /// \param CanceledDirective The kind of directive that is cancled. /// \param ExitCB Extra code to be generated in the exit block. - void emitCancelationCheckImpl(Value *CancelFlag, - omp::Directive CanceledDirective, - FinalizeCallbackTy ExitCB = {}); + /// + /// \return an error, if any were triggered during execution. + Error emitCancelationCheckImpl(Value *CancelFlag, + omp::Directive CanceledDirective, + FinalizeCallbackTy ExitCB = {}); /// Generate a target region entry call. /// @@ -2135,8 +2151,10 @@ class OpenMPIRBuilder { /// } else { /// ElseGen(); /// } - void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, - BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {}); + /// + /// \return an error, if any were triggered during execution. + Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, + BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {}); /// Create the global variable holding the offload mappings information. GlobalVariable *createOffloadMaptypes(SmallVectorImpl &Mappings, @@ -2340,7 +2358,8 @@ class OpenMPIRBuilder { /// is executed when the kernel launch fails. It takes an insertion point as /// parameter where the code should be emitted. It returns an insertion point /// that points right after after the emitted code. - using EmitFallbackCallbackTy = function_ref; + using EmitFallbackCallbackTy = + function_ref; /// Generate a target region entry call and host fallback call. /// @@ -2352,7 +2371,7 @@ class OpenMPIRBuilder { /// \param DeviceID Identifier for the device via the 'device' clause. /// \param RTLoc Source location identifier /// \param AllocaIP The insertion point to be used for alloca instructions. - InsertPointTy + InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, @@ -2366,9 +2385,11 @@ class OpenMPIRBuilder { /// \param RTLoc Source location identifier /// \Param TargetTaskAllocaIP Insertion point for the alloca block of the /// generated task. + /// + /// \return an error, if any were triggered during execution. using TargetTaskBodyCallbackTy = - function_ref; + function_ref; /// Generate a target-task for the target construct /// @@ -2380,7 +2401,7 @@ class OpenMPIRBuilder { /// dependencies as specified by the 'depend' clause. /// \param HasNoWait True if the target construct had 'nowait' on it, false /// otherwise - InsertPointTy emitTargetTask( + InsertPointOrErrorTy emitTargetTask( TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector &Dependencies, @@ -2478,11 +2499,11 @@ class OpenMPIRBuilder { /// \param CPFuncs copy functions to use for each copyprivate variable. /// /// \returns The insertion position *after* the single call. - InsertPointTy createSingle(const LocationDescription &Loc, - BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, bool IsNowait, - ArrayRef CPVars = {}, - ArrayRef CPFuncs = {}); + InsertPointOrErrorTy createSingle(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, bool IsNowait, + ArrayRef CPVars = {}, + ArrayRef CPFuncs = {}); /// Generator for '#omp master' /// @@ -2491,9 +2512,9 @@ class OpenMPIRBuilder { /// \param FiniCB Callback to finalize variable copies. /// /// \returns The insertion position *after* the master. - InsertPointTy createMaster(const LocationDescription &Loc, - BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB); + InsertPointOrErrorTy createMaster(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB); /// Generator for '#omp masked' /// @@ -2502,9 +2523,9 @@ class OpenMPIRBuilder { /// \param FiniCB Callback to finialize variable copies. /// /// \returns The insertion position *after* the masked. - InsertPointTy createMasked(const LocationDescription &Loc, - BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, Value *Filter); + InsertPointOrErrorTy createMasked(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, Value *Filter); /// Generator for '#omp critical' /// @@ -2515,10 +2536,10 @@ class OpenMPIRBuilder { /// \param HintInst Hint Instruction for hint clause associated with critical /// /// \returns The insertion position *after* the critical. - InsertPointTy createCritical(const LocationDescription &Loc, - BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, - StringRef CriticalName, Value *HintInst); + InsertPointOrErrorTy createCritical(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, + StringRef CriticalName, Value *HintInst); /// Generator for '#omp ordered depend (source | sink)' /// @@ -2544,10 +2565,10 @@ class OpenMPIRBuilder { /// otherwise, with simd clause; /// /// \returns The insertion position *after* the ordered. - InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, - BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB, - bool IsThreads); + InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB, + bool IsThreads); /// Generator for '#omp sections' /// @@ -2560,12 +2581,11 @@ class OpenMPIRBuilder { /// \param IsNowait If true, barrier - to ensure all sections are executed /// before moving forward will not be generated. /// \returns The insertion position *after* the sections. - InsertPointTy createSections(const LocationDescription &Loc, - InsertPointTy AllocaIP, - ArrayRef SectionCBs, - PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, bool IsCancellable, - bool IsNowait); + InsertPointOrErrorTy + createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, + ArrayRef SectionCBs, + PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, + bool IsCancellable, bool IsNowait); /// Generator for '#omp section' /// @@ -2573,9 +2593,9 @@ class OpenMPIRBuilder { /// \param BodyGenCB Callback that will generate the region body code. /// \param FiniCB Callback to finalize variable copies. /// \returns The insertion position *after* the section. - InsertPointTy createSection(const LocationDescription &Loc, - BodyGenCallbackTy BodyGenCB, - FinalizeCallbackTy FiniCB); + InsertPointOrErrorTy createSection(const LocationDescription &Loc, + BodyGenCallbackTy BodyGenCB, + FinalizeCallbackTy FiniCB); /// Generator for `#omp teams` /// @@ -2589,7 +2609,7 @@ class OpenMPIRBuilder { /// contention group created by each team. /// \param IfExpr is the integer argument value of the if condition on the /// teams clause. - InsertPointTy + InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr, Value *ThreadLimit = nullptr, Value *IfExpr = nullptr); @@ -2764,7 +2784,8 @@ class OpenMPIRBuilder { public: /// Functions used to generate a function with the given name. - using FunctionGenCallback = std::function; + using FunctionGenCallback = + std::function(StringRef FunctionName)>; /// Create a unique name for the entry function using the source location /// information of the current target region. The name will be something like: @@ -2797,10 +2818,10 @@ class OpenMPIRBuilder { /// \param GenerateFunctionCallback The callback function to generate the code /// \param OutlinedFunction Pointer to the outlined function /// \param EntryFnIDName Name of the ID o be created - void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, - FunctionGenCallback &GenerateFunctionCallback, - bool IsOffloadEntry, Function *&OutlinedFn, - Constant *&OutlinedFnID); + Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, + FunctionGenCallback &GenerateFunctionCallback, + bool IsOffloadEntry, Function *&OutlinedFn, + Constant *&OutlinedFnID); /// Registers the given function and sets up the attribtues of the function /// Returns the FunctionID. @@ -2851,22 +2872,22 @@ class OpenMPIRBuilder { /// use_device_ptr and use_device_addr. /// \param CustomMapperCB Optional callback to generate code related to /// custom mappers. - OpenMPIRBuilder::InsertPointTy createTargetData( + InsertPointOrErrorTy createTargetData( const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc = nullptr, - function_ref + function_ref BodyGenCB = nullptr, function_ref DeviceAddrCB = nullptr, function_ref CustomMapperCB = nullptr, Value *SrcLocInfo = nullptr); - using TargetBodyGenCallbackTy = function_ref; - using TargetGenArgAccessorsCallbackTy = function_ref; @@ -2887,7 +2908,7 @@ class OpenMPIRBuilder { /// \param Dependencies A vector of DependData objects that carry // dependency information as passed in the depend clause // \param HasNowait Whether the target construct has a `nowait` clause or not. - InsertPointTy createTarget( + InsertPointOrErrorTy createTarget( const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, @@ -2969,10 +2990,10 @@ class OpenMPIRBuilder { /// should be called. /// /// \return The insertion position in exit block - InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, - InsertPointTy FinIP, - Instruction *ExitCall, - bool HasFinalize = true); + InsertPointOrErrorTy emitCommonDirectiveExit(omp::Directive OMPD, + InsertPointTy FinIP, + Instruction *ExitCall, + bool HasFinalize = true); /// Common Interface to generate OMP inlined regions /// @@ -2990,8 +3011,7 @@ class OpenMPIRBuilder { /// \param IsCancellable if HasFinalize is set to true, indicate if the /// the directive should be cancellable. /// \return The insertion point after the region - - InsertPointTy + InsertPointOrErrorTy EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional = false, @@ -3027,7 +3047,7 @@ class OpenMPIRBuilder { /// /// \returns Value to update X to. using AtomicUpdateCallbackTy = - const function_ref &IRB)>; + const function_ref(Value *XOld, IRBuilder<> &IRB)>; private: enum AtomicKind { Read, Write, Update, Capture, Compare }; @@ -3066,7 +3086,7 @@ class OpenMPIRBuilder { /// /// \returns A pair of the old value of X before the update, and the value /// used for the update. - std::pair + Expected> emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, @@ -3143,12 +3163,11 @@ class OpenMPIRBuilder { /// (e.g. true for X = X BinOp Expr) /// /// \return Insertion point after generated atomic update IR. - InsertPointTy createAtomicUpdate(const LocationDescription &Loc, - InsertPointTy AllocaIP, AtomicOpValue &X, - Value *Expr, AtomicOrdering AO, - AtomicRMWInst::BinOp RMWOp, - AtomicUpdateCallbackTy &UpdateOp, - bool IsXBinopExpr); + InsertPointOrErrorTy + createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, + AtomicOpValue &X, Value *Expr, AtomicOrdering AO, + AtomicRMWInst::BinOp RMWOp, + AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr); /// Emit atomic update for constructs: --- Only Scalar data types /// V = X; X = X BinOp Expr , @@ -3179,7 +3198,7 @@ class OpenMPIRBuilder { /// 'v', not an updated one. /// /// \return Insertion point after generated atomic capture IR. - InsertPointTy + InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index be93f9f2e1fdc8b..d2e4dc1c85dfd2d 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -945,7 +945,7 @@ Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { "omp_global_thread_num"); } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive Kind, bool ForceSimpleCall, bool CheckCancelFlag) { if (!updateToLocation(Loc)) @@ -992,12 +992,13 @@ OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive Kind, Args); if (UseCancelBarrier && CheckCancelFlag) - emitCancelationCheckImpl(Result, OMPD_parallel); + if (Error Err = emitCancelationCheckImpl(Result, OMPD_parallel)) + return Err; return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective) { @@ -1029,18 +1030,22 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc, Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); - auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) { + auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) -> Error { if (CanceledDirective == OMPD_parallel) { IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(IP); - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); + return createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, + /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false) + .takeError(); } + return Error::success(); }; // The actual cancel logic is shared with others, e.g., cancel_barriers. - emitCancelationCheckImpl(Result, CanceledDirective, ExitCB); + if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB)) + return Err; // Update the insertion point and remove the terminator we introduced. Builder.SetInsertPoint(UI->getParent()); @@ -1079,7 +1084,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel( return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitKernelLaunch( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch( const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP) { @@ -1134,15 +1139,18 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitKernelLaunch( auto CurFn = Builder.GetInsertBlock()->getParent(); emitBlock(OffloadFailedBlock, CurFn); - Builder.restoreIP(EmitTargetCallFallbackCB(Builder.saveIP())); + InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP()); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); emitBranch(OffloadContBlock); emitBlock(OffloadContBlock, CurFn, /*IsFinished=*/true); return Builder.saveIP(); } -void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, - omp::Directive CanceledDirective, - FinalizeCallbackTy ExitCB) { +Error OpenMPIRBuilder::emitCancelationCheckImpl( + Value *CancelFlag, omp::Directive CanceledDirective, + FinalizeCallbackTy ExitCB) { assert(isLastFinalizationInfoCancellable(CanceledDirective) && "Unexpected cancellation!"); @@ -1171,12 +1179,15 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag, // post finalization block that is known to the FiniCB callback. Builder.SetInsertPoint(CancellationBlock); if (ExitCB) - ExitCB(Builder.saveIP()); + if (Error Err = ExitCB(Builder.saveIP())) + return Err; auto &FI = FinalizationStack.back(); - FI.FiniCB(Builder.saveIP()); + if (Error Err = FI.FiniCB(Builder.saveIP())) + return Err; // The continuation block is where code generation continues. Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin()); + return Error::success(); } // Callback used to create OpenMP runtime calls to support @@ -1355,7 +1366,7 @@ hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, } } -IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( const LocationDescription &Loc, InsertPointTy OuterAllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, @@ -1496,7 +1507,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); - BodyGenCB(InnerAllocaIP, CodeGenIP); + if (Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP)) + return Err; LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); @@ -1565,10 +1577,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( FunctionCallee TIDRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); - auto PrivHelper = [&](Value &V) { + auto PrivHelper = [&](Value &V) -> Error { if (&V == TIDAddr || &V == ZeroAddr) { OI.ExcludeArgsFromAggregate.push_back(&V); - return; + return Error::success(); } SetVector Uses; @@ -1608,8 +1620,11 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) { ReplacementValue = PrivTID; } else { - Builder.restoreIP( - PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue)); + InsertPointOrErrorTy AfterIP = + PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); InnerAllocaIP = { InnerAllocaIP.getBlock(), InnerAllocaIP.getBlock()->getTerminator()->getIterator()}; @@ -1617,11 +1632,13 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( assert(ReplacementValue && "Expected copy/create callback to set replacement value!"); if (ReplacementValue == &V) - return; + return Error::success(); } for (Use *UPtr : Uses) UPtr->set(ReplacementValue); + + return Error::success(); }; // Reset the inner alloca insertion as it will be used for loading the values @@ -1640,7 +1657,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( for (Value *Input : Inputs) { LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n"); - PrivHelper(*Input); + if (Error Err = PrivHelper(*Input)) + return Err; } LLVM_DEBUG({ for (Value *Output : Outputs) @@ -1666,7 +1684,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator(); InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); - FiniCB(PreFiniIP); + if (Error Err = FiniCB(PreFiniIP)) + return Err; // Register the outlined info. addOutlineInfo(std::move(OI)); @@ -1797,7 +1816,7 @@ static Value *emitTaskDependencies( return DepArray; } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, @@ -1833,7 +1852,8 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, InsertPointTy TaskAllocaIP = InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); - BodyGenCB(TaskAllocaIP, TaskBodyIP); + if (Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP)) + return Err; OutlineInfo OI; OI.EntryBB = TaskAllocaBB; @@ -2048,7 +2068,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB) { @@ -2066,7 +2086,8 @@ OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc, Builder.CreateCall(TaskgroupFn, {Ident, ThreadID}); BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit"); - BodyGenCB(AllocaIP, Builder.saveIP()); + if (Error Err = BodyGenCB(AllocaIP, Builder.saveIP())) + return Err; Builder.SetInsertPoint(TaskgroupExitBB); // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup @@ -2077,7 +2098,7 @@ OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc, return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) { @@ -2124,7 +2145,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( // ... // section_loop.after: // ; - auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) { + auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) -> Error { Builder.restoreIP(CodeGenIP); BasicBlock *Continue = splitBBWithSuffix(Builder, /*CreateBranch=*/false, ".sections.after"); @@ -2138,12 +2159,14 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); Builder.SetInsertPoint(CaseBB); BranchInst *CaseEndBr = Builder.CreateBr(Continue); - SectionCB(InsertPointTy(), - {CaseEndBr->getParent(), CaseEndBr->getIterator()}); + if (Error Err = SectionCB(InsertPointTy(), {CaseEndBr->getParent(), + CaseEndBr->getIterator()})) + return Err; CaseNumber++; } // remove the existing terminator from body BB since there can be no // terminators after switch/case + return Error::success(); }; // Loop body ends here // LowerBound, UpperBound, and STride for createCanonicalLoop @@ -2151,10 +2174,16 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( Value *LB = ConstantInt::get(I32Ty, 0); Value *UB = ConstantInt::get(I32Ty, SectionCBs.size()); Value *ST = ConstantInt::get(I32Ty, 1); - llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop( + Expected LoopInfo = createCanonicalLoop( Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop"); - InsertPointTy AfterIP = - applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait); + if (!LoopInfo) + return LoopInfo.takeError(); + + InsertPointOrErrorTy WsloopIP = + applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP, !IsNowait); + if (!WsloopIP) + return WsloopIP.takeError(); + InsertPointTy AfterIP = *WsloopIP; // Apply the finalization callback in LoopAfterBB auto FiniInfo = FinalizationStack.pop_back_val(); @@ -2164,14 +2193,15 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections( Builder.restoreIP(AfterIP); BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini"); - CB(Builder.saveIP()); + if (Error Err = CB(Builder.saveIP())) + return Err; AfterIP = {FiniBB, FiniBB->begin()}; } return AfterIP; } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB) { @@ -2502,7 +2532,7 @@ void OpenMPIRBuilder::emitReductionListCopy( } } -Function *OpenMPIRBuilder::emitInterWarpCopyFunction( +Expected OpenMPIRBuilder::emitInterWarpCopyFunction( const LocationDescription &Loc, ArrayRef ReductionInfos, AttributeList FuncAttrs) { InsertPointTy SavedIP = Builder.saveIP(); @@ -2621,10 +2651,13 @@ Function *OpenMPIRBuilder::emitInterWarpCopyFunction( } // kmpc_barrier. - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, - /* ForceSimpleCall */ false, - /* CheckCancelFlag */ true); + InsertPointOrErrorTy BarrierIP1 = + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, + /* ForceSimpleCall */ false, + /* CheckCancelFlag */ true); + if (!BarrierIP1) + return BarrierIP1.takeError(); BasicBlock *ThenBB = BasicBlock::Create(Ctx, "then"); BasicBlock *ElseBB = BasicBlock::Create(Ctx, "else"); BasicBlock *MergeBB = BasicBlock::Create(Ctx, "ifcont"); @@ -2666,10 +2699,13 @@ Function *OpenMPIRBuilder::emitInterWarpCopyFunction( // endif emitBlock(MergeBB, Builder.GetInsertBlock()->getParent()); - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, - /* ForceSimpleCall */ false, - /* CheckCancelFlag */ true); + InsertPointOrErrorTy BarrierIP2 = + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, + /* ForceSimpleCall */ false, + /* CheckCancelFlag */ true); + if (!BarrierIP2) + return BarrierIP2.takeError(); // Warp 0 copies reduce element from transfer medium BasicBlock *W0ThenBB = BasicBlock::Create(Ctx, "then"); @@ -3286,7 +3322,7 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const { return (Name + Suffix).str(); } -Function *OpenMPIRBuilder::createReductionFunction( +Expected OpenMPIRBuilder::createReductionFunction( StringRef ReducerName, ArrayRef ReductionInfos, ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) { auto *FuncTy = FunctionType::get(Builder.getVoidTy(), @@ -3352,7 +3388,10 @@ Function *OpenMPIRBuilder::createReductionFunction( Value *LHS = Builder.CreateLoad(RI.ElementType, LHSPtr); Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr); Value *Reduced; - RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced); + InsertPointOrErrorTy AfterIP = + RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced); + if (!AfterIP) + return AfterIP.takeError(); if (!Builder.GetInsertBlock()) return ReductionFunc; Builder.CreateStore(Reduced, LHSPtr); @@ -3405,7 +3444,7 @@ checkReductionInfos(ArrayRef ReductionInfos, } } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductionsGPU( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef ReductionInfos, bool IsNoWait, bool IsTeamsReduction, bool HasDistribute, @@ -3435,11 +3474,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductionsGPU( AttrBldr.removeAttribute(Attribute::OptimizeNone); FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr); - Function *ReductionFunc = nullptr; CodeGenIP = Builder.saveIP(); - ReductionFunc = + Expected ReductionResult = createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, ReductionGenCBKind, FuncAttrs); + if (!ReductionResult) + return ReductionResult.takeError(); + Function *ReductionFunc = *ReductionResult; Builder.restoreIP(CodeGenIP); // Set the grid value in the config needed for lowering later on @@ -3480,7 +3521,11 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductionsGPU( CodeGenIP = Builder.saveIP(); Function *SarFunc = emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs); - Function *WcFunc = emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs); + Expected CopyResult = + emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs); + if (!CopyResult) + return CopyResult.takeError(); + Function *WcFunc = *CopyResult; Builder.restoreIP(CodeGenIP); Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy); @@ -3595,7 +3640,7 @@ static Function *getFreshReductionFunc(Module &M) { ".omp.reduction.func", &M); } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef ReductionInfos, @@ -3688,7 +3733,7 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, Type *ValueType = RI.ElementType; // We have one less load for by-ref case because that load is now inside of // the reduction region - Value *RedValue = nullptr; + Value *RedValue = RI.Variable; if (!IsByRef[En.index()]) { RedValue = Builder.CreateLoad(ValueType, RI.Variable, "red.value." + Twine(En.index())); @@ -3697,13 +3742,12 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, Builder.CreateLoad(ValueType, RI.PrivateVariable, "red.private.value." + Twine(En.index())); Value *Reduced; - if (IsByRef[En.index()]) { - Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RI.Variable, - PrivateRedValue, Reduced)); - } else { - Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RedValue, - PrivateRedValue, Reduced)); - } + InsertPointOrErrorTy AfterIP = + RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + if (!Builder.GetInsertBlock()) return InsertPointTy(); // for by-ref case, the load is inside of the reduction region @@ -3722,8 +3766,11 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, Builder.SetInsertPoint(AtomicRedBlock); if (CanGenerateAtomic && llvm::none_of(IsByRef, [](bool P) { return P; })) { for (const ReductionInfo &RI : ReductionInfos) { - Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType, - RI.Variable, RI.PrivateVariable)); + InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen( + Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); if (!Builder.GetInsertBlock()) return InsertPointTy(); } @@ -3755,7 +3802,11 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType()); Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr); Value *Reduced; - Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced)); + InsertPointOrErrorTy AfterIP = + RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); if (!Builder.GetInsertBlock()) return InsertPointTy(); // store is inside of the reduction region when using by-ref @@ -3768,11 +3819,10 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB) { - if (!updateToLocation(Loc)) return Loc.IP; @@ -3793,7 +3843,7 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc, /*Conditional*/ true, /*hasFinalize*/ true); } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter) { @@ -3884,7 +3934,7 @@ CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton( return CL; } -CanonicalLoopInfo * +Expected OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name) { @@ -3906,7 +3956,8 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, // Emit the body content. We do it after connecting the loop to the CFG to // avoid that the callback encounters degenerate BBs. - BodyGenCB(CL->getBodyIP(), CL->getIndVar()); + if (Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar())) + return Err; #ifndef NDEBUG CL->assertOK(); @@ -3914,7 +3965,7 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc, return CL; } -CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( +Expected OpenMPIRBuilder::createCanonicalLoop( const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name) { @@ -3979,7 +4030,7 @@ CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop( Builder.restoreIP(CodeGenIP); Value *Span = Builder.CreateMul(IV, Step); Value *IndVar = Builder.CreateAdd(Span, Start); - BodyGenCB(Builder.saveIP(), IndVar); + return BodyGenCB(Builder.saveIP(), IndVar); }; LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP(); return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name); @@ -4001,7 +4052,7 @@ static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier) { @@ -4078,10 +4129,14 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); // Add the barrier if requested. - if (NeedsBarrier) - createBarrier(LocationDescription(Builder.saveIP(), DL), - omp::Directive::OMPD_for, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); + if (NeedsBarrier) { + InsertPointOrErrorTy BarrierIP = + createBarrier(LocationDescription(Builder.saveIP(), DL), + omp::Directive::OMPD_for, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + if (!BarrierIP) + return BarrierIP.takeError(); + } InsertPointTy AfterIP = CLI->getAfterIP(); CLI->invalidate(); @@ -4089,9 +4144,12 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, return AfterIP; } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( - DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - bool NeedsBarrier, Value *ChunkSize) { +OpenMPIRBuilder::InsertPointOrErrorTy +OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(DebugLoc DL, + CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + bool NeedsBarrier, + Value *ChunkSize) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(ChunkSize && "Chunk size is required"); @@ -4167,12 +4225,23 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( // Create outer "dispatch" loop for enumerating the chunks. BasicBlock *DispatchEnter = splitBB(Builder, true); Value *DispatchCounter; - CanonicalLoopInfo *DispatchCLI = createCanonicalLoop( + Expected LoopResult = createCanonicalLoop( {Builder.saveIP(), DL}, - [&](InsertPointTy BodyIP, Value *Counter) { DispatchCounter = Counter; }, + [&](InsertPointTy BodyIP, Value *Counter) { + DispatchCounter = Counter; + return Error::success(); + }, FirstChunkStart, CastedTripCount, NextChunkStride, /*IsSigned=*/false, /*InclusiveStop=*/false, /*ComputeIP=*/{}, "dispatch"); + if (!LoopResult) { + // It is safe to assume this didn't return an error because the callback + // passed into createCanonicalLoop is the only possible error source, and it + // always returns success. Need to still cast the result into bool to avoid + // runtime errors. + llvm_unreachable("unexpected error creating canonical loop"); + } + CanonicalLoopInfo *DispatchCLI = *LoopResult; // Remember the BasicBlocks of the dispatch loop we need, then invalidate to // not have to preserve the canonical invariant. @@ -4219,9 +4288,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum}); // Add the barrier if requested. - if (NeedsBarrier) - createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for, - /*ForceSimpleCall=*/false, /*CheckCancelFlag=*/false); + if (NeedsBarrier) { + InsertPointOrErrorTy AfterIP = + createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for, + /*ForceSimpleCall=*/false, /*CheckCancelFlag=*/false); + if (!AfterIP) + return AfterIP.takeError(); + } #ifndef NDEBUG // Even though we currently do not support applying additional methods to it, @@ -4229,7 +4302,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop( CLI->assertOK(); #endif - return {DispatchAfter, DispatchAfter->getFirstInsertionPt()}; + return InsertPointTy(DispatchAfter, DispatchAfter->getFirstInsertionPt()); } // Returns an LLVM function to call for executing an OpenMP static worksharing @@ -4462,7 +4535,7 @@ OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI, return CLI->getAfterIP(); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop( DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, omp::ScheduleKind SchedKind, Value *ChunkSize, bool HasSimdModifier, bool HasMonotonicModifier, @@ -4563,9 +4636,11 @@ getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) { llvm_unreachable("unknown OpenMP loop iterator bitwidth"); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( - DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, - OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) { +OpenMPIRBuilder::InsertPointOrErrorTy +OpenMPIRBuilder::applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, + InsertPointTy AllocaIP, + OMPScheduleType SchedType, + bool NeedsBarrier, Value *Chunk) { assert(CLI->isValid() && "Requires a valid canonical loop"); assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) && "Require dedicated allocate IP"); @@ -4681,9 +4756,12 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( // Add the barrier if requested. if (NeedsBarrier) { Builder.SetInsertPoint(&Exit->back()); - createBarrier(LocationDescription(Builder.saveIP(), DL), - omp::Directive::OMPD_for, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); + InsertPointOrErrorTy BarrierIP = + createBarrier(LocationDescription(Builder.saveIP(), DL), + omp::Directive::OMPD_for, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + if (!BarrierIP) + return BarrierIP.takeError(); } CLI->invalidate(); @@ -5542,7 +5620,7 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc, return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef CPVars, ArrayRef CPFuncs) { @@ -5571,14 +5649,17 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single); Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); - auto FiniCBWrapper = [&](InsertPointTy IP) { - FiniCB(IP); + auto FiniCBWrapper = [&](InsertPointTy IP) -> Error { + if (Error Err = FiniCB(IP)) + return Err; // The thread that executes the single region must set `DidIt` to 1. // This is used by __kmpc_copyprivate, to know if the caller is the // single thread or not. if (DidIt) Builder.CreateStore(Builder.getInt32(1), DidIt); + + return Error::success(); }; // generates the following: @@ -5589,9 +5670,12 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( // __kmpc_copyprivate // __kmpc_barrier - EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper, - /*Conditional*/ true, - /*hasFinalize*/ true); + InsertPointOrErrorTy AfterIP = + EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper, + /*Conditional*/ true, + /*hasFinalize*/ true); + if (!AfterIP) + return AfterIP.takeError(); if (DidIt) { for (size_t I = 0, E = CPVars.size(); I < E; ++I) @@ -5600,14 +5684,18 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle( /*BufSize=*/ConstantInt::get(Int64, 0), CPVars[I], CPFuncs[I], DidIt); // NOTE __kmpc_copyprivate already inserts a barrier - } else if (!IsNowait) - createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), - omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, - /* CheckCancelFlag */ false); + } else if (!IsNowait) { + InsertPointOrErrorTy AfterIP = + createBarrier(LocationDescription(Builder.saveIP(), Loc.DL), + omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false, + /* CheckCancelFlag */ false); + if (!AfterIP) + return AfterIP.takeError(); + } return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) { @@ -5688,7 +5776,7 @@ OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc, return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd( const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads) { if (!updateToLocation(Loc)) @@ -5717,7 +5805,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd( /*Conditional*/ false, /*hasFinalize*/ true); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, bool HasFinalize, bool IsCancellable) { @@ -5739,15 +5827,19 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); // generate body - BodyGenCB(/* AllocaIP */ InsertPointTy(), - /* CodeGenIP */ Builder.saveIP()); + if (Error Err = BodyGenCB(/* AllocaIP */ InsertPointTy(), + /* CodeGenIP */ Builder.saveIP())) + return Err; // emit exit call and do any needed finalization. auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt()); assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && FiniBB->getTerminator()->getSuccessor(0) == ExitBB && "Unexpected control flow graph state!!"); - emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + InsertPointOrErrorTy AfterIP = + emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + if (!AfterIP) + return AfterIP.takeError(); assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && "Unexpected Control Flow State!"); MergeBlockIntoPredecessor(FiniBB); @@ -5796,7 +5888,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry( return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt()); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit( omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, bool HasFinalize) { @@ -5810,7 +5902,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( FinalizationInfo Fi = FinalizationStack.pop_back_val(); assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!"); - Fi.FiniCB(FinIP); + if (Error Err = Fi.FiniCB(FinIP)) + return Err; BasicBlock *FiniBB = FinIP.getBlock(); Instruction *FiniBBTI = FiniBB->getTerminator(); @@ -6319,7 +6412,7 @@ Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn, Constant::getNullValue(Builder.getInt8Ty()), EntryFnName); } -void OpenMPIRBuilder::emitTargetRegionFunction( +Error OpenMPIRBuilder::emitTargetRegionFunction( TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID) { @@ -6327,15 +6420,20 @@ void OpenMPIRBuilder::emitTargetRegionFunction( SmallString<64> EntryFnName; OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo); - OutlinedFn = Config.isTargetDevice() || !Config.openMPOffloadMandatory() - ? GenerateFunctionCallback(EntryFnName) - : nullptr; + if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) { + Expected CBResult = GenerateFunctionCallback(EntryFnName); + if (!CBResult) + return CBResult.takeError(); + OutlinedFn = *CBResult; + } else { + OutlinedFn = nullptr; + } // If this target outline function is not an offload entry, we don't need to // register it. This may be in the case of a false if clause, or if there are // no OpenMP targets. if (!IsOffloadEntry) - return; + return Error::success(); std::string EntryFnIDName = Config.isTargetDevice() @@ -6344,6 +6442,7 @@ void OpenMPIRBuilder::emitTargetRegionFunction( OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName); + return Error::success(); } Constant *OpenMPIRBuilder::registerTargetRegionFunction( @@ -6359,12 +6458,13 @@ Constant *OpenMPIRBuilder::registerTargetRegionFunction( return OutlinedFnID; } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc, - function_ref + function_ref BodyGenCB, function_ref DeviceAddrCB, function_ref CustomMapperCB, Value *SrcLocInfo) { @@ -6374,8 +6474,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( Builder.restoreIP(CodeGenIP); // Disable TargetData CodeGen on Device pass. if (Config.IsTargetDevice.value_or(false)) { - if (BodyGenCB) - Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv)); + if (BodyGenCB) { + InsertPointOrErrorTy AfterIP = + BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + } return Builder.saveIP(); } @@ -6384,7 +6489,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. - auto BeginThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BeginThenGen = [&](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP) -> Error { MapInfo = &GenMapInfoCB(Builder.saveIP()); emitOffloadingArrays(AllocaIP, Builder.saveIP(), *MapInfo, Info, /*IsNonContiguous=*/true, DeviceAddrCB, @@ -6413,7 +6519,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( if (IsStandAlone) { assert(MapperFunc && "MapperFunc missing for standalone target data"); - auto TaskBodyCB = [&](Value *, Value *, IRBuilderBase::InsertPoint) { + auto TaskBodyCB = [&](Value *, Value *, + IRBuilderBase::InsertPoint) -> Error { if (Info.HasNoWait) { OffloadingArgs.append({llvm::Constant::getNullValue(Int32), llvm::Constant::getNullValue(VoidPtr), @@ -6431,16 +6538,20 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( emitBlock(OffloadContBlock, CurFn, /*IsFinished=*/true); Builder.restoreIP(Builder.saveIP()); } + return Error::success(); }; bool RequiresOuterTargetTask = Info.HasNoWait; - - if (!RequiresOuterTargetTask) - TaskBodyCB(/*DeviceID=*/nullptr, /*RTLoc=*/nullptr, - /*TargetTaskAllocaIP=*/{}); - else - emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP, - /*Dependencies=*/{}, Info.HasNoWait); + if (!RequiresOuterTargetTask) { + Error Err = TaskBodyCB(/*DeviceID=*/nullptr, /*RTLoc=*/nullptr, + /*TargetTaskAllocaIP=*/{}); + assert(!Err && "TaskBodyCB expected to succeed"); + } else { + InsertPointOrErrorTy AfterIP = + emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP, + /*Dependencies=*/{}, Info.HasNoWait); + assert(AfterIP && "TaskBodyCB expected to succeed"); + } } else { Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr( omp::OMPRTL___tgt_target_data_begin_mapper); @@ -6458,15 +6569,26 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( // If device pointer privatization is required, emit the body of the // region here. It will have to be duplicated: with and without // privatization. - Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::Priv)); + InsertPointOrErrorTy AfterIP = + BodyGenCB(Builder.saveIP(), BodyGenTy::Priv); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); } + return Error::success(); }; // If we need device pointer privatization, we need to emit the body of the // region with no privatization in the 'else' branch of the conditional. // Otherwise, we don't have to do anything. - auto BeginElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv)); + auto BeginElseGen = [&](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP) -> Error { + InsertPointOrErrorTy AfterIP = + BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + return Error::success(); }; // Generate code for the closing of the data region. @@ -6494,35 +6616,45 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper); Builder.CreateCall(EndMapperFunc, OffloadingArgs); + return Error::success(); }; // We don't have to do anything to close the region if the if clause evaluates // to false. - auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; + auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + return Error::success(); + }; - if (BodyGenCB) { - if (IfCond) { - emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP); - } else { - BeginThenGen(AllocaIP, Builder.saveIP()); + Error Err = [&]() -> Error { + if (BodyGenCB) { + Error Err = [&]() { + if (IfCond) + return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP); + return BeginThenGen(AllocaIP, Builder.saveIP()); + }(); + + if (Err) + return Err; + + // If we don't require privatization of device pointers, we emit the body + // in between the runtime calls. This avoids duplicating the body code. + InsertPointOrErrorTy AfterIP = + BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); + + if (IfCond) + return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP); + return EndThenGen(AllocaIP, Builder.saveIP()); } + if (IfCond) + return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP); + return BeginThenGen(AllocaIP, Builder.saveIP()); + }(); - // If we don't require privatization of device pointers, we emit the body in - // between the runtime calls. This avoids duplicating the body code. - Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv)); - - if (IfCond) { - emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP); - } else { - EndThenGen(AllocaIP, Builder.saveIP()); - } - } else { - if (IfCond) { - emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP); - } else { - BeginThenGen(AllocaIP, Builder.saveIP()); - } - } + if (Err) + return Err; return Builder.saveIP(); } @@ -6591,7 +6723,7 @@ FunctionCallee OpenMPIRBuilder::createDispatchDeinitFunction() { return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit); } -static Function *createOutlinedFunction( +static Expected createOutlinedFunction( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName, SmallVectorImpl &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, @@ -6671,7 +6803,11 @@ static Function *createOutlinedFunction( OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func); // Insert target deinit call in the device compilation pass. - Builder.restoreIP(CBFunc(Builder.saveIP(), Builder.saveIP())); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + CBFunc(Builder.saveIP(), Builder.saveIP()); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); if (OMPBuilder.Config.isTargetDevice()) OMPBuilder.createTargetDeinit(Builder); @@ -6726,8 +6862,11 @@ static Function *createOutlinedFunction( Argument &Arg = std::get<1>(InArg); Value *InputCopy = nullptr; - Builder.restoreIP( - ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.saveIP())); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.saveIP()); + if (!AfterIP) + return AfterIP.takeError(); + Builder.restoreIP(*AfterIP); // In certain cases a Global may be set up for replacement, however, this // Global may be used in multiple arguments to the kernel, just segmented @@ -6847,7 +6986,8 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, Builder.CreateRetVoid(); return ProxyFn; } -static void emitTargetOutlinedFunction( + +static Error emitTargetOutlinedFunction( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl &Inputs, @@ -6861,11 +7001,12 @@ static void emitTargetOutlinedFunction( CBFunc, ArgAccessorFuncCB); }; - OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, - IsOffloadEntry, OutlinedFn, OutlinedFnID); + return OMPBuilder.emitTargetRegionFunction( + EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn, + OutlinedFnID); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector &Dependencies, @@ -6983,7 +7124,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask( Builder.restoreIP(TargetTaskBodyIP); - TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP); + if (Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP)) + return Err; OI.ExitBB = Builder.saveIP().getBlock(); OI.PostOutlineCB = [this, ToBeDeleted, Dependencies, HasNoWait, @@ -7161,8 +7303,8 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, // Generate a function call to the host fallback implementation of the target // region. This is called by the host when no offload entry was generated for // the target region and when the offloading call fails at runtime. - auto &&EmitTargetCallFallbackCB = - [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy { + auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP) + -> OpenMPIRBuilder::InsertPointOrErrorTy { Builder.restoreIP(IP); Builder.CreateCall(OutlinedFn, Args); return Builder.saveIP(); @@ -7173,9 +7315,10 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::TargetKernelArgs KArgs; - auto TaskBodyCB = [&](Value *DeviceID, Value *RTLoc, - IRBuilderBase::InsertPoint TargetTaskAllocaIP) { - if (OutlinedFnID) { + auto TaskBodyCB = + [&](Value *DeviceID, Value *RTLoc, + IRBuilderBase::InsertPoint TargetTaskAllocaIP) -> Error { + llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() { // emitKernelLaunch makes the necessary runtime call to offload the // kernel. We then outline all that code into a separate function // ('kernel_launch_function' in the pseudo code above). This function is @@ -7183,31 +7326,41 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, // '@.omp_target_task_proxy_func' in the pseudo code above) // "@.omp_target_task_proxy_func' is generated by // emitTargetTaskProxyFunction. - Builder.restoreIP(OMPBuilder.emitKernelLaunch( - Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, DeviceID, - RTLoc, TargetTaskAllocaIP)); - } else { - // When OutlinedFnID is set to nullptr, then it's not an offloading - // call. In this case, we execute the host implementation directly. - OMPBuilder.Builder.restoreIP( - EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP())); - } + if (OutlinedFnID) + return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID, + EmitTargetCallFallbackCB, KArgs, + DeviceID, RTLoc, TargetTaskAllocaIP); + // When OutlinedFnID is set to nullptr, then it's not an offloading call. + // In this case, we execute the host implementation directly. + return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP()); + }(); + + if (!AfterIP) + return AfterIP.takeError(); + + OMPBuilder.Builder.restoreIP(*AfterIP); + return Error::success(); }; // If we don't have an ID for the target region, it means an offload entry // wasn't created. In this case we just run the host fallback directly. if (!OutlinedFnID) { - if (RequiresOuterTargetTask) { - // Arguments that are intended to be directly forwarded to an - // emitKernelLaunch call are pased as nullptr, since OutlinedFnID=nullptr - // results in that call not being done. - Builder.restoreIP(OMPBuilder.emitTargetTask(TaskBodyCB, - /*DeviceID=*/nullptr, - /*RTLoc=*/nullptr, AllocaIP, - Dependencies, HasNoWait)); - } else { - Builder.restoreIP(EmitTargetCallFallbackCB(Builder.saveIP())); - } + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() { + if (RequiresOuterTargetTask) { + // Arguments that are intended to be directly forwarded to an + // emitKernelLaunch call are pased as nullptr, since + // OutlinedFnID=nullptr results in that call not being done. + return OMPBuilder.emitTargetTask(TaskBodyCB, /*DeviceID=*/nullptr, + /*RTLoc=*/nullptr, AllocaIP, + Dependencies, HasNoWait); + } + return EmitTargetCallFallbackCB(Builder.saveIP()); + }(); + + // Assume no error was returned because EmitTargetCallFallbackCB doesn't + // produce any. The 'if' check enables accessing the returned value. + if (AfterIP) + Builder.restoreIP(*AfterIP); return; } @@ -7247,17 +7400,24 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, // The presence of certain clauses on the target directive require the // explicit generation of the target task. - if (RequiresOuterTargetTask) { - Builder.restoreIP(OMPBuilder.emitTargetTask( - TaskBodyCB, DeviceID, RTLoc, AllocaIP, Dependencies, HasNoWait)); - } else { - Builder.restoreIP(OMPBuilder.emitKernelLaunch( - Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, DeviceID, RTLoc, - AllocaIP)); - } + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() { + if (RequiresOuterTargetTask) + return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP, + Dependencies, HasNoWait); + + return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID, + EmitTargetCallFallbackCB, KArgs, + DeviceID, RTLoc, AllocaIP); + }(); + + // Assume no error was returned because TaskBodyCB and + // EmitTargetCallFallbackCB don't produce any. The 'if' check enables + // accessing the returned value. + if (AfterIP) + Builder.restoreIP(*AfterIP); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget( const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, ArrayRef NumTeams, ArrayRef NumThreads, @@ -7276,9 +7436,10 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget( // The target region is outlined into its own function. The LLVM IR for // the target region itself is generated using the callbacks CBFunc // and ArgAccessorFuncCB - emitTargetOutlinedFunction(*this, Builder, IsOffloadEntry, EntryInfo, - OutlinedFn, OutlinedFnID, Args, CBFunc, - ArgAccessorFuncCB); + if (Error Err = emitTargetOutlinedFunction( + *this, Builder, IsOffloadEntry, EntryInfo, OutlinedFn, OutlinedFnID, + Args, CBFunc, ArgAccessorFuncCB)) + return Err; // If we are not on the target device, then we need to generate code // to make a remote call (offload) to the previously outlined function @@ -7767,18 +7928,17 @@ void OpenMPIRBuilder::emitBlock(BasicBlock *BB, Function *CurFn, Builder.SetInsertPoint(BB); } -void OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, - BodyGenCallbackTy ElseGen, - InsertPointTy AllocaIP) { +Error OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, + BodyGenCallbackTy ElseGen, + InsertPointTy AllocaIP) { // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. if (auto *CI = dyn_cast(Cond)) { auto CondConstant = CI->getSExtValue(); if (CondConstant) - ThenGen(AllocaIP, Builder.saveIP()); - else - ElseGen(AllocaIP, Builder.saveIP()); - return; + return ThenGen(AllocaIP, Builder.saveIP()); + + return ElseGen(AllocaIP, Builder.saveIP()); } Function *CurFn = Builder.GetInsertBlock()->getParent(); @@ -7791,16 +7951,19 @@ void OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, Builder.CreateCondBr(Cond, ThenBlock, ElseBlock); // Emit the 'then' code. emitBlock(ThenBlock, CurFn); - ThenGen(AllocaIP, Builder.saveIP()); + if (Error Err = ThenGen(AllocaIP, Builder.saveIP())) + return Err; emitBranch(ContBlock); // Emit the 'else' code if present. // There is no need to emit line number for unconditional branch. emitBlock(ElseBlock, CurFn); - ElseGen(AllocaIP, Builder.saveIP()); + if (Error Err = ElseGen(AllocaIP, Builder.saveIP())) + return Err; // There is no need to emit line number for unconditional branch. emitBranch(ContBlock); // Emit the continuation block for code after the if. emitBlock(ContBlock, CurFn, /*IsFinished=*/true); + return Error::success(); } bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic( @@ -7948,7 +8111,7 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc, return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate( const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) { @@ -7969,8 +8132,11 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate( "OpenMP atomic does not support LT or GT operations"); }); - emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp, - X.IsVolatile, IsXBinopExpr); + Expected> AtomicResult = + emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp, + X.IsVolatile, IsXBinopExpr); + if (!AtomicResult) + return AtomicResult.takeError(); checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update); return Builder.saveIP(); } @@ -8010,7 +8176,7 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2, llvm_unreachable("Unsupported atomic update operation"); } -std::pair OpenMPIRBuilder::emitAtomicUpdate( +Expected> OpenMPIRBuilder::emitAtomicUpdate( InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) { @@ -8072,7 +8238,10 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2); PHI->addIncoming(AtomicLoadRes.first, CurBB); Value *OldExprVal = PHI; - Value *Upd = UpdateOp(OldExprVal, Builder); + Expected CBResult = UpdateOp(OldExprVal, Builder); + if (!CBResult) + return CBResult.takeError(); + Value *Upd = *CBResult; Builder.CreateStore(Upd, NewAtomicAddr); AtomicOrdering Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO); @@ -8129,7 +8298,10 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( } } - Value *Upd = UpdateOp(OldExprVal, Builder); + Expected CBResult = UpdateOp(OldExprVal, Builder); + if (!CBResult) + return CBResult.takeError(); + Value *Upd = *CBResult; Builder.CreateStore(Upd, NewAtomicAddr); LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr); AtomicOrdering Failure = @@ -8158,7 +8330,7 @@ std::pair OpenMPIRBuilder::emitAtomicUpdate( return Res; } -OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture( const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, @@ -8181,11 +8353,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture( // If UpdateExpr is 'x' updated with some `expr` not based on 'x', // 'x' is simply atomically rewritten with 'expr'. AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg); - std::pair Result = + Expected> AtomicResult = emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp, X.IsVolatile, IsXBinopExpr); - - Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second); + if (!AtomicResult) + return AtomicResult.takeError(); + Value *CapturedVal = + (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second); Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile); checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture); @@ -8380,7 +8554,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare( return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointTy +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower, Value *NumTeamsUpper, Value *ThreadLimit, @@ -8463,7 +8637,8 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, // Generate the body of teams. InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - BodyGenCB(AllocaIP, CodeGenIP); + if (Error Err = BodyGenCB(AllocaIP, CodeGenIP)) + return Err; OutlineInfo OI; OI.EntryBB = AllocaBB; diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 35664a5c7a2ac27..9e25620710fc84a 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1093,6 +1093,7 @@ struct OpenMPOpt { CGStartBB->getTerminator()->setSuccessor(0, StartBB); assert(EndBB != nullptr && "EndBB should not be null"); EndBB->getTerminator()->setSuccessor(0, CGEndBB); + return Error::success(); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, @@ -1101,7 +1102,7 @@ struct OpenMPOpt { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) {}; + auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; /// Create a sequential execution region within a merged parallel region, /// encapsulated in a master construct with a barrier for synchronization. @@ -1132,8 +1133,9 @@ struct OpenMPOpt { CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); + return Error::success(); }; - auto FiniCB = [&](InsertPointTy CodeGenIP) {}; + auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; // Find outputs from the sequential region to outside users and // broadcast their values to them. @@ -1176,12 +1178,15 @@ struct OpenMPOpt { OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(ParentBB, ParentBB->end()), DL); - InsertPointTy SeqAfterIP = + OpenMPIRBuilder::InsertPointOrErrorTy SeqAfterIP = OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); + assert(SeqAfterIP && "Unexpected error creating master"); - OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); + OpenMPIRBuilder::InsertPointOrErrorTy BarrierAfterIP = + OMPInfoCache.OMPBuilder.createBarrier(*SeqAfterIP, OMPD_parallel); + assert(BarrierAfterIP && "Unexpected error creating barrier"); - BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); + BranchInst::Create(SeqAfterBB, SeqAfterIP->getBlock()); LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn << "\n"); @@ -1251,10 +1256,12 @@ struct OpenMPOpt { OriginalFn->getEntryBlock().getFirstInsertionPt()); // Create the merged parallel region with default proc binding, to // avoid overriding binding settings, and without explicit cancellation. - InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, - OMP_PROC_BIND_default, /* IsCancellable */ false); - BranchInst::Create(AfterBB, AfterIP.getBlock()); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPInfoCache.OMPBuilder.createParallel( + Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, + OMP_PROC_BIND_default, /* IsCancellable */ false); + assert(AfterIP && "Unexpected error creating parallel"); + BranchInst::Create(AfterBB, AfterIP->getBlock()); // Perform the actual outlining. OMPInfoCache.OMPBuilder.finalize(OriginalFn); @@ -1290,10 +1297,12 @@ struct OpenMPOpt { if (CI != MergableCIs.back()) { // TODO: Remove barrier if the merged parallel region includes the // 'nowait' clause. - OMPInfoCache.OMPBuilder.createBarrier( - InsertPointTy(NewCI->getParent(), - NewCI->getNextNode()->getIterator()), - OMPD_parallel); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPInfoCache.OMPBuilder.createBarrier( + InsertPointTy(NewCI->getParent(), + NewCI->getNextNode()->getIterator()), + OMPD_parallel); + assert(AfterIP && "Unexpected error creating barrier"); } CI->eraseFromParent(); diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index fe04cbbce12dcd4..630cd03c688012c 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -27,6 +27,20 @@ using namespace llvm; using namespace omp; +// Wrapper lambdas to allow using EXPECT*() macros inside of error-returning +// callbacks. +#define FINICB_WRAPPER(cb) \ + [&cb](InsertPointTy IP) -> Error { \ + cb(IP); \ + return Error::success(); \ + } + +#define BODYGENCB_WRAPPER(cb) \ + [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error { \ + cb(AllocaIP, CodeGenIP); \ + return Error::success(); \ + } + namespace { /// Create an instruction that uses the values in \p Values. We use "printf" @@ -218,9 +232,13 @@ class OpenMPIRBuilderTest : public testing::Test { CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC}); if (Call) *Call = CallInst; + + return Error::success(); }; - CanonicalLoopInfo *Loop = + Expected LoopResult = OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *Loop = *LoopResult; // Finalize the function. Builder.restoreIP(Loop->getAfterIP()); @@ -327,14 +345,18 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) { IRBuilder<> Builder(BB); - OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for); + OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP1 = + OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for); + assert(BarrierIP1 && "unexpected error"); EXPECT_TRUE(M->global_empty()); EXPECT_EQ(M->size(), 1U); EXPECT_EQ(F->size(), 1U); EXPECT_EQ(BB->size(), 0U); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - OMPBuilder.createBarrier(Loc, OMPD_for); + OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP2 = + OMPBuilder.createBarrier(Loc, OMPD_for); + assert(BarrierIP2 && "unexpected error"); EXPECT_FALSE(M->global_empty()); EXPECT_EQ(M->size(), 3U); EXPECT_EQ(F->size(), 1U); @@ -372,13 +394,15 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); BranchInst::Create(CBB, IP.getBlock()); }; - OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); + OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true}); IRBuilder<> Builder(BB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel); - Builder.restoreIP(NewIP); + OpenMPIRBuilder::InsertPointOrErrorTy NewIP = + OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel); + assert(NewIP && "unexpected error"); + Builder.restoreIP(*NewIP); EXPECT_FALSE(M->global_empty()); EXPECT_EQ(M->size(), 4U); EXPECT_EQ(F->size(), 4U); @@ -400,7 +424,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) { EXPECT_EQ(Cancel->getNumUses(), 1U); Instruction *CancelBBTI = Cancel->getParent()->getTerminator(); EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); - EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock()); + EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP->getBlock()); EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); EXPECT_NE(GTID1, nullptr); @@ -439,13 +463,15 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); BranchInst::Create(CBB, IP.getBlock()); }; - OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); + OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true}); IRBuilder<> Builder(BB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel); - Builder.restoreIP(NewIP); + OpenMPIRBuilder::InsertPointOrErrorTy NewIP = + OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel); + assert(NewIP && "unexpected error"); + Builder.restoreIP(*NewIP); EXPECT_FALSE(M->global_empty()); EXPECT_EQ(M->size(), 4U); EXPECT_EQ(F->size(), 7U); @@ -473,7 +499,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) { EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U); EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U); EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(), - NewIP.getBlock()); + NewIP->getBlock()); EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U); CallInst *GTID1 = dyn_cast(&CancelBBTI->getSuccessor(1)->front()); EXPECT_NE(GTID1, nullptr); @@ -512,13 +538,15 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { ASSERT_EQ(IP.getBlock()->end(), IP.getPoint()); BranchInst::Create(CBB, IP.getBlock()); }; - OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true}); + OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true}); IRBuilder<> Builder(BB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()}); - auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for); - Builder.restoreIP(NewIP); + OpenMPIRBuilder::InsertPointOrErrorTy NewIP = + OMPBuilder.createBarrier(Loc, OMPD_for); + assert(NewIP && "unexpected error"); + Builder.restoreIP(*NewIP); EXPECT_FALSE(M->global_empty()); EXPECT_EQ(M->size(), 3U); EXPECT_EQ(F->size(), 4U); @@ -540,7 +568,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) { EXPECT_EQ(Barrier->getNumUses(), 1U); Instruction *BarrierBBTI = Barrier->getParent()->getTerminator(); EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U); - EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock()); + EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP->getBlock()); EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U); EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(), 1U); @@ -563,7 +591,9 @@ TEST_F(OpenMPIRBuilderTest, DbgLoc) { IRBuilder<> Builder(BB); OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - OMPBuilder.createBarrier(Loc, OMPD_for); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createBarrier(Loc, OMPD_for); + assert(AfterIP && "unexpected error"); CallInst *GTID = dyn_cast(&BB->front()); CallInst *Barrier = dyn_cast(GTID->getNextNode()); EXPECT_EQ(GTID->getDebugLoc(), DL); @@ -627,6 +657,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), &ThenTerm, &ElseTerm); + return Error::success(); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, @@ -654,19 +685,23 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { + ++NumFinalizationPoints; + return Error::success(); + }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - IRBuilder<>::InsertPoint AfterIP = + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + assert(AfterIP && "unexpected error"); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); EXPECT_EQ(NumFinalizationPoints, 1U); - Builder.restoreIP(AfterIP); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -735,6 +770,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), &ThenTerm, &ElseTerm); + return Error::success(); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, @@ -762,18 +798,22 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { + ++NumFinalizationPoints; + return Error::success(); + }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - IRBuilder<>::InsertPoint AfterIP = + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + assert(AfterIP && "unexpected error"); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); EXPECT_EQ(NumFinalizationPoints, 1U); - Builder.restoreIP(AfterIP); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -826,6 +866,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumInnerBodiesGenerated; + return Error::success(); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, @@ -841,7 +882,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { + ++NumFinalizationPoints; + return Error::success(); + }; auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; @@ -849,27 +893,29 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { BasicBlock *CGBB = CodeGenIP.getBlock(); BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint()); CGBB->getTerminator()->eraseFromParent(); - ; - IRBuilder<>::InsertPoint AfterIP = OMPBuilder.createParallel( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel( InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + assert(AfterIP && "unexpected error"); - Builder.restoreIP(AfterIP); + Builder.restoreIP(*AfterIP); Builder.CreateBr(NewBB); + return Error::success(); }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - IRBuilder<>::InsertPoint AfterIP = + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + assert(AfterIP && "unexpected error"); EXPECT_EQ(NumInnerBodiesGenerated, 1U); EXPECT_EQ(NumOuterBodiesGenerated, 1U); EXPECT_EQ(NumFinalizationPoints, 2U); - Builder.restoreIP(AfterIP); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -920,6 +966,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumInnerBodiesGenerated; + return Error::success(); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, @@ -935,7 +982,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { + ++NumFinalizationPoints; + return Error::success(); + }; auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { ++NumOuterBodiesGenerated; @@ -948,32 +998,36 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { NewBB1->getTerminator()->eraseFromParent(); ; - IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.createParallel( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP1 = OMPBuilder.createParallel( InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + assert(AfterIP1 && "unexpected error"); - Builder.restoreIP(AfterIP1); + Builder.restoreIP(*AfterIP1); Builder.CreateBr(NewBB1); - IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.createParallel( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP2 = OMPBuilder.createParallel( InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + assert(AfterIP2 && "unexpected error"); - Builder.restoreIP(AfterIP2); + Builder.restoreIP(*AfterIP2); Builder.CreateBr(NewBB2); + return Error::success(); }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - IRBuilder<>::InsertPoint AfterIP = + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); + assert(AfterIP && "unexpected error"); EXPECT_EQ(NumInnerBodiesGenerated, 2U); EXPECT_EQ(NumOuterBodiesGenerated, 1U); EXPECT_EQ(NumFinalizationPoints, 3U); - Builder.restoreIP(AfterIP); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -1043,6 +1097,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm, &ElseTerm); + return Error::success(); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, @@ -1073,20 +1128,22 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; // No destructors. + return Error::success(); }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - IRBuilder<>::InsertPoint AfterIP = + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()), nullptr, OMP_PROC_BIND_default, false); + assert(AfterIP && "unexpected error"); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); EXPECT_EQ(NumFinalizationPoints, 1U); - Builder.restoreIP(AfterIP); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -1141,8 +1198,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { // Create three barriers, two cancel barriers but only one checked. Function *CBFn, *BFn; - Builder.restoreIP( - OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel)); + OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP1 = + OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel); + assert(BarrierIP1 && "unexpected error"); + Builder.restoreIP(*BarrierIP1); CBFn = M->getFunction("__kmpc_cancel_barrier"); BFn = M->getFunction("__kmpc_barrier"); @@ -1153,8 +1212,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U); CheckedBarrier = cast(CBFn->user_back()); - Builder.restoreIP( - OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true)); + OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP2 = + OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true); + assert(BarrierIP2 && "unexpected error"); + Builder.restoreIP(*BarrierIP2); CBFn = M->getFunction("__kmpc_cancel_barrier"); BFn = M->getFunction("__kmpc_barrier"); ASSERT_NE(CBFn, nullptr); @@ -1164,8 +1225,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { ASSERT_TRUE(isa(BFn->user_back())); ASSERT_EQ(BFn->user_back()->getNumUses(), 0U); - Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, - false, false)); + OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP3 = + OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, false, false); + assert(BarrierIP3 && "unexpected error"); + Builder.restoreIP(*BarrierIP3); ASSERT_EQ(CBFn->getNumUses(), 2U); ASSERT_EQ(BFn->getNumUses(), 1U); ASSERT_TRUE(CBFn->user_back() != CheckedBarrier); @@ -1190,21 +1253,23 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { Builder.restoreIP(IP); Builder.CreateCall(FakeDestructor, {Builder.getInt32(NumFinalizationPoints)}); + return Error::success(); }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - IRBuilder<>::InsertPoint AfterIP = - OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, - Builder.CreateIsNotNull(F->arg_begin()), - nullptr, OMP_PROC_BIND_default, true); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel( + Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB, FiniCB, + Builder.CreateIsNotNull(F->arg_begin()), nullptr, OMP_PROC_BIND_default, + true); + assert(AfterIP && "unexpected error"); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 0U); EXPECT_EQ(NumFinalizationPoints, 2U); EXPECT_EQ(FakeDestructor->getNumUses(), 2U); - Builder.restoreIP(AfterIP); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -1269,20 +1334,22 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { Builder.CreateCall(TakeI32PtrFunc, I32PtrVal); Builder.CreateCall(TakeStructFunc, StructVal); Builder.CreateCall(TakeStructPtrFunc, StructPtrVal); + return Error::success(); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, Value &Inner, Value *&ReplacementValue) { ReplacementValue = &Inner; return CodeGenIP; }; - auto FiniCB = [](InsertPointTy) {}; + auto FiniCB = [](InsertPointTy) { return Error::success(); }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - IRBuilder<>::InsertPoint AfterIP = + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false); - Builder.restoreIP(AfterIP); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -1312,10 +1379,13 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) { Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), &ThenTerm, &ElseTerm); + return Error::success(); }; - CanonicalLoopInfo *Loop = + Expected LoopResult = OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *Loop = *LoopResult; Builder.restoreIP(Loop->getAfterIP()); ReturnInst *RetInst = Builder.CreateRetVoid(); @@ -1367,10 +1437,14 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) { Value *StartVal = ConstantInt::get(LCTy, Start); Value *StopVal = ConstantInt::get(LCTy, Stop); Value *StepVal = ConstantInt::get(LCTy, Step); - auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {}; - CanonicalLoopInfo *Loop = + auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { + return Error::success(); + }; + Expected LoopResult = OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, StepVal, IsSigned, InclusiveStop); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *Loop = *LoopResult; Loop->assertOK(); Builder.restoreIP(Loop->getAfterIP()); Value *TripCount = Loop->getTripCount(); @@ -1463,16 +1537,22 @@ TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) { Value *InnerLC) { Builder.restoreIP(InnerCodeGenIP); Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC}); + return Error::success(); }; - InnerLoop = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( Builder.saveIP(), InnerLoopBodyGenCB, InnerTripCount, "inner"); + assert(LoopResult && "unexpected error"); + InnerLoop = *LoopResult; Builder.restoreIP(InnerLoop->getAfterIP()); InbetweenTrail = createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC}); + return Error::success(); }; - CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( OuterLoc, OuterLoopBodyGenCB, OuterTripCount, "outer"); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *OuterLoop = *LoopResult; // Finish the function. Builder.restoreIP(OuterLoop->getAfterIP()); @@ -1582,12 +1662,18 @@ TEST_F(OpenMPIRBuilderTest, TileNestedLoops) { // Add something that consumes the induction variables to the body. createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); + return Error::success(); }; - InnerLoop = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner"); + assert(LoopResult && "unexpected error"); + InnerLoop = *LoopResult; + return Error::success(); }; - CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( Loc, OuterLoopBodyGenCB, TripCount, "outer"); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *OuterLoop = *LoopResult; // Finalize the function. Builder.restoreIP(OuterLoop->getAfterIP()); @@ -1682,14 +1768,20 @@ TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) { // Add something that consumes the induction variable to the body. Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC}); + return Error::success(); }; - InnerLoop = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal, InnerStep, false, false, ComputeIP, "inner"); + assert(LoopResult && "unexpected error"); + InnerLoop = *LoopResult; + return Error::success(); }; - CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false, false, ComputeIP, "outer"); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *OuterLoop = *LoopResult; // Finalize the function Builder.restoreIP(OuterLoop->getAfterIP()); @@ -1793,10 +1885,14 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) { Value *StepVal = ConstantInt::get(LCTy, Step); // Generate a loop. - auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {}; - CanonicalLoopInfo *Loop = + auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) { + return Error::success(); + }; + Expected LoopResult = OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal, StepVal, IsSigned, InclusiveStop); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *Loop = *LoopResult; InsertPointTy AfterIP = Loop->getAfterIP(); // Tile the loop. @@ -2245,19 +2341,22 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) { Value *StartVal = ConstantInt::get(LCTy, 10); Value *StopVal = ConstantInt::get(LCTy, 52); Value *StepVal = ConstantInt::get(LCTy, 2); - auto LoopBodyGen = [&](InsertPointTy, Value *) {}; + auto LoopBodyGen = [&](InsertPointTy, Value *) { return Error::success(); }; - CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( Loc, LoopBodyGen, StartVal, StopVal, StepVal, false, false); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *CLI = *LoopResult; BasicBlock *Preheader = CLI->getPreheader(); Value *TripCount = CLI->getTripCount(); Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); - IRBuilder<>::InsertPoint AfterIP = OMPBuilder.applyWorkshareLoop( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.applyWorkshareLoop( DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static, nullptr, false, false, false, false, WorksharingLoopType::ForStaticLoop); - Builder.restoreIP(AfterIP); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -2306,11 +2405,15 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) { Value *StartVal = ConstantInt::get(LCTy, 10); Value *StopVal = ConstantInt::get(LCTy, 52); Value *StepVal = ConstantInt::get(LCTy, 2); - auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; + auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) { + return Error::success(); + }; - CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( Loc, LoopBodyGen, StartVal, StopVal, StepVal, /*IsSigned=*/false, /*InclusiveStop=*/false); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *CLI = *LoopResult; BasicBlock *Preheader = CLI->getPreheader(); BasicBlock *Body = CLI->getBody(); Value *IV = CLI->getIndVar(); @@ -2319,8 +2422,9 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) { Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); InsertPointTy AllocaIP = Builder.saveIP(); - OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true, - OMP_SCHEDULE_Static); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.applyWorkshareLoop( + DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static); + assert(AfterIP && "unexpected error"); BasicBlock *Cond = Body->getSinglePredecessor(); Instruction *Cmp = &*Cond->begin(); @@ -2412,8 +2516,9 @@ TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) { Value *ChunkSize = ConstantInt::get(LCTy, 5); InsertPointTy AllocaIP{&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()}; - OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true, - OMP_SCHEDULE_Static, ChunkSize); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.applyWorkshareLoop( + DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkSize); + assert(AfterIP && "unexpected error"); OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -2500,11 +2605,15 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { Value *StepVal = ConstantInt::get(LCTy, 2); Value *ChunkVal = (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize); - auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; + auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) { + return Error::success(); + }; - CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( Loc, LoopBodyGen, StartVal, StopVal, StepVal, /*IsSigned=*/false, /*InclusiveStop=*/false); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *CLI = *LoopResult; Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); InsertPointTy AllocaIP = Builder.saveIP(); @@ -2517,7 +2626,7 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { BasicBlock *LatchBlock = CLI->getLatch(); Value *IV = CLI->getIndVar(); - InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop( + OpenMPIRBuilder::InsertPointOrErrorTy EndIP = OMPBuilder.applyWorkshareLoop( DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType), ChunkVal, /*Simd=*/false, (SchedType & omp::OMPScheduleType::ModifierMonotonic) == @@ -2525,10 +2634,11 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) == omp::OMPScheduleType::ModifierNonmonotonic, /*Ordered=*/false); + assert(EndIP && "unexpected error"); // The returned value should be the "after" point. - ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock()); - ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint()); + ASSERT_EQ(EndIP->getBlock(), AfterIP.getBlock()); + ASSERT_EQ(EndIP->getPoint(), AfterIP.getPoint()); auto AllocaIter = BB->begin(); ASSERT_GE(std::distance(BB->begin(), BB->end()), 4); @@ -2603,7 +2713,7 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { EXPECT_EQ(NumCallsInExitBlock, 2u); // Add a termination to our block and check that it is internally consistent. - Builder.restoreIP(EndIP); + Builder.restoreIP(*EndIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -2642,11 +2752,15 @@ TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) { Value *StopVal = ConstantInt::get(LCTy, 52); Value *StepVal = ConstantInt::get(LCTy, 2); Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize); - auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {}; + auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) { + return llvm::Error::success(); + }; - CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop( + Expected LoopResult = OMPBuilder.createCanonicalLoop( Loc, LoopBodyGen, StartVal, StopVal, StepVal, /*IsSigned=*/false, /*InclusiveStop=*/false); + assert(LoopResult && "unexpected error"); + CanonicalLoopInfo *CLI = *LoopResult; Builder.SetInsertPoint(BB, BB->getFirstInsertionPt()); InsertPointTy AllocaIP = Builder.saveIP(); @@ -2658,14 +2772,15 @@ TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) { BasicBlock *LatchBlock = CLI->getLatch(); Value *IV = CLI->getIndVar(); - InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop( + OpenMPIRBuilder::InsertPointOrErrorTy EndIP = OMPBuilder.applyWorkshareLoop( DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkVal, /*HasSimdModifier=*/false, /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, /*HasOrderedClause=*/true); + assert(EndIP && "unexpected error"); // Add a termination to our block and check that it is internally consistent. - Builder.restoreIP(EndIP); + Builder.restoreIP(*EndIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -2749,7 +2864,10 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { EXPECT_NE(IPBB->end(), IP.getPoint()); }; - Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createMaster( + Builder, BODYGENCB_WRAPPER(BodyGenCB), FINICB_WRAPPER(FiniCB)); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Value *EntryBBTI = EntryBB->getTerminator(); EXPECT_NE(EntryBBTI, nullptr); EXPECT_TRUE(isa(EntryBBTI)); @@ -2827,8 +2945,10 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) { }; Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Builder.restoreIP( - OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, Filter)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createMasked( + Builder, BODYGENCB_WRAPPER(BodyGenCB), FINICB_WRAPPER(FiniCB), Filter); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Value *EntryBBTI = EntryBB->getTerminator(); EXPECT_NE(EntryBBTI, nullptr); EXPECT_TRUE(isa(EntryBBTI)); @@ -2893,8 +3013,11 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { }; BasicBlock *EntryBB = Builder.GetInsertBlock(); - Builder.restoreIP(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB, - "testCRT", nullptr)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createCritical(Builder, BODYGENCB_WRAPPER(BodyGenCB), + FINICB_WRAPPER(FiniCB), "testCRT", nullptr); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); CallInst *CriticalEntryCI = nullptr; for (auto &EI : *EntryBB) { @@ -3141,8 +3264,11 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { // Test for "#omp ordered [threads]" BasicBlock *EntryBB = Builder.GetInsertBlock(); - Builder.restoreIP( - OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB), + FINICB_WRAPPER(FiniCB), true); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -3212,8 +3338,11 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { // Test for "#omp ordered simd" BasicBlock *EntryBB = Builder.GetInsertBlock(); - Builder.restoreIP( - OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB), + FINICB_WRAPPER(FiniCB), false); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -3326,8 +3455,11 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { EXPECT_NE(IPBB->end(), IP.getPoint()); }; - Builder.restoreIP( - OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ false)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB), + FINICB_WRAPPER(FiniCB), /*IsNowait*/ false); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Value *EntryBBTI = EntryBB->getTerminator(); EXPECT_NE(EntryBBTI, nullptr); EXPECT_TRUE(isa(EntryBBTI)); @@ -3416,8 +3548,11 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { EXPECT_NE(IPBB->end(), IP.getPoint()); }; - Builder.restoreIP( - OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ true)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB), + FINICB_WRAPPER(FiniCB), /*IsNowait*/ true); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Value *EntryBBTI = EntryBB->getTerminator(); EXPECT_NE(EntryBBTI, nullptr); EXPECT_TRUE(isa(EntryBBTI)); @@ -3535,9 +3670,11 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { EXPECT_NE(IPBB->end(), IP.getPoint()); }; - Builder.restoreIP(OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, - /*IsNowait*/ false, {CPVar}, - {CopyFunc})); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createSingle( + Builder, BODYGENCB_WRAPPER(BodyGenCB), FINICB_WRAPPER(FiniCB), + /*IsNowait*/ false, {CPVar}, {CopyFunc}); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Value *EntryBBTI = EntryBB->getTerminator(); EXPECT_NE(EntryBBTI, nullptr); EXPECT_TRUE(isa(EntryBBTI)); @@ -3798,8 +3935,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) { Sub = IRB.CreateSub(ConstVal, Atomic); return Sub; }; - Builder.restoreIP(OMPBuilder.createAtomicUpdate( - Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate( + Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); BasicBlock *ContBB = EntryBB->getSingleSuccessor(); BranchInst *ContTI = dyn_cast(ContBB->getTerminator()); EXPECT_NE(ContTI, nullptr); @@ -3865,8 +4004,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) { Sub = IRB.CreateFSub(ConstVal, Atomic); return Sub; }; - Builder.restoreIP(OMPBuilder.createAtomicUpdate( - Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate( + Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); BasicBlock *ContBB = EntryBB->getSingleSuccessor(); BranchInst *ContTI = dyn_cast(ContBB->getTerminator()); EXPECT_NE(ContTI, nullptr); @@ -3931,8 +4072,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) { Sub = IRB.CreateSub(ConstVal, Atomic); return Sub; }; - Builder.restoreIP(OMPBuilder.createAtomicUpdate( - Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate( + Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); BasicBlock *ContBB = EntryBB->getSingleSuccessor(); BranchInst *ContTI = dyn_cast(ContBB->getTerminator()); EXPECT_NE(ContTI, nullptr); @@ -4003,9 +4146,12 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) { // integer update - not used auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; }; - Builder.restoreIP(OMPBuilder.createAtomicCapture( - Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, UpdateExpr, - IsPostfixUpdate, IsXLHSInRHSPart)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createAtomicCapture(Builder, AllocaIP, X, V, Expr, AO, RMWOp, + UpdateOp, UpdateExpr, IsPostfixUpdate, + IsXLHSInRHSPart); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); EXPECT_EQ(EntryBB->getParent()->size(), 1U); AtomicRMWInst *ARWM = dyn_cast(Init->getNextNode()); EXPECT_NE(ARWM, nullptr); @@ -4361,12 +4507,15 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) { Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), &ThenTerm, &ElseTerm); + return Error::success(); }; OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - Builder.restoreIP(OMPBuilder.createTeams( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams( Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr, - /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); + /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -4423,14 +4572,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) { auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); + return Error::success(); }; // `F` has an argument - an integer, so we use that as the thread limit. - Builder.restoreIP(OMPBuilder.createTeams(/*=*/Builder, BodyGenCB, - /*NumTeamsLower=*/nullptr, - /*NumTeamsUpper=*/nullptr, - /*ThreadLimit=*/F->arg_begin(), - /*IfExpr=*/nullptr)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams( + /*=*/Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, + /*NumTeamsUpper=*/nullptr, /*ThreadLimit=*/F->arg_begin(), + /*IfExpr=*/nullptr); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4474,15 +4625,19 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) { auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); + return Error::success(); }; // `F` already has an integer argument, so we use that as upper bound to // `num_teams` - Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, - /*NumTeamsLower=*/nullptr, - /*NumTeamsUpper=*/F->arg_begin(), - /*ThreadLimit=*/nullptr, - /*IfExpr=*/nullptr)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createTeams(Builder, BodyGenCB, + /*NumTeamsLower=*/nullptr, + /*NumTeamsUpper=*/F->arg_begin(), + /*ThreadLimit=*/nullptr, + /*IfExpr=*/nullptr); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4531,13 +4686,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) { auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); + return Error::success(); }; // `F` already has an integer argument, so we use that as upper bound to // `num_teams` - Builder.restoreIP( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, - /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr)); + /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4593,11 +4751,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) { auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); + return Error::success(); }; OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); - Builder.restoreIP(OMPBuilder.createTeams( - Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, nullptr)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams( + Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, nullptr); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4644,13 +4805,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) { auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); + return Error::success(); }; // `F` already has an integer argument, so we use that as upper bound to // `num_teams` - Builder.restoreIP(OMPBuilder.createTeams( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams( Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr, - /*ThreadLimit=*/nullptr, IfExpr)); + /*ThreadLimit=*/nullptr, IfExpr); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4707,12 +4871,15 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) { auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); + return Error::success(); }; // `F` already has an integer argument, so we use that as upper bound to // `num_teams` - Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, - NumTeamsUpper, ThreadLimit, IfExpr)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams( + Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, IfExpr); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -4937,6 +5104,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { BodyIP = Builder.saveIP(); BodyAllocaIP = InnerAllocaIP; + return Error::success(); }; // Privatization for reduction creates local copies of reduction variables and @@ -4969,14 +5137,15 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { }; // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; - InsertPointTy AfterIP = + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); - Builder.restoreIP(AfterIP); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OpenMPIRBuilder::ReductionInfo ReductionInfos[] = { {SumType, SumReduced, SumPrivatized, @@ -4989,10 +5158,12 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { bool ReduceVariableByRef[] = {false, false}; - OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos, - ReduceVariableByRef); + OpenMPIRBuilder::InsertPointOrErrorTy ReductionsIP = + OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos, + ReduceVariableByRef); + assert(ReductionsIP && "unexpected error"); - Builder.restoreIP(AfterIP); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(F); @@ -5172,6 +5343,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { FirstBodyIP = Builder.saveIP(); FirstBodyAllocaIP = InnerAllocaIP; + return Error::success(); }; InsertPointTy SecondBodyIP, SecondBodyAllocaIP; @@ -5190,6 +5362,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { SecondBodyIP = Builder.saveIP(); SecondBodyAllocaIP = InnerAllocaIP; + return Error::success(); }; // Privatization for reduction creates local copies of reduction variables and @@ -5224,36 +5397,44 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { }; // Do nothing in finalization. - auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; }; + auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; - Builder.restoreIP( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP1 = OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, FiniCB, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, - /* IsCancellable */ false)); - InsertPointTy AfterIP = OMPBuilder.createParallel( + /* IsCancellable */ false); + assert(AfterIP1 && "unexpected error"); + Builder.restoreIP(*AfterIP1); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP2 = OMPBuilder.createParallel( {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); + assert(AfterIP2 && "unexpected error"); + Builder.restoreIP(*AfterIP2); OMPBuilder.Config.setIsGPU(false); bool ReduceVariableByRef[] = {false}; - OMPBuilder.createReductions( - FirstBodyIP, FirstBodyAllocaIP, - {{SumType, SumReduced, SumPrivatized, - /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction, - /*ReductionGenClang=*/nullptr, sumAtomicReduction}}, - ReduceVariableByRef); - OMPBuilder.createReductions( - SecondBodyIP, SecondBodyAllocaIP, - {{XorType, XorReduced, XorPrivatized, - /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction, - /*ReductionGenClang=*/nullptr, xorAtomicReduction}}, - ReduceVariableByRef); - - Builder.restoreIP(AfterIP); + OpenMPIRBuilder::InsertPointOrErrorTy ReductionsIP1 = + OMPBuilder.createReductions( + FirstBodyIP, FirstBodyAllocaIP, + {{SumType, SumReduced, SumPrivatized, + /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction, + /*ReductionGenClang=*/nullptr, sumAtomicReduction}}, + ReduceVariableByRef); + assert(ReductionsIP1 && "unexpected error"); + OpenMPIRBuilder::InsertPointOrErrorTy ReductionsIP2 = + OMPBuilder.createReductions( + SecondBodyIP, SecondBodyAllocaIP, + {{XorType, XorReduced, XorPrivatized, + /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction, + /*ReductionGenClang=*/nullptr, xorAtomicReduction}}, + ReduceVariableByRef); + assert(ReductionsIP2 && "unexpected error"); + + Builder.restoreIP(*AfterIP2); Builder.CreateRetVoid(); OMPBuilder.finalize(F); @@ -5320,8 +5501,10 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { llvm::SmallVector SectionCBVector; llvm::SmallVector CaseBBs; - auto FiniCB = [&](InsertPointTy IP) {}; - auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; + auto FiniCB = [&](InsertPointTy IP) { return Error::success(); }; + auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + return Error::success(); + }; SectionCBVector.push_back(SectionCB); auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, @@ -5329,8 +5512,10 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { llvm::Value *&ReplVal) { return CodeGenIP; }; IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, - PrivCB, FiniCB, false, false)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createSections( + Loc, AllocaIP, SectionCBVector, PrivCB, FiniCB, false, false); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); // Required at the end of the function EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr); EXPECT_FALSE(verifyModule(*M, &errs())); @@ -5371,6 +5556,7 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) { Value *PrivLoad = Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca"); Builder.CreateICmpNE(F->arg_begin(), PrivLoad); + return Error::success(); }; auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { @@ -5383,8 +5569,11 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) { IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, - PrivCB, FiniCB, false, false)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, PrivCB, + FINICB_WRAPPER(FiniCB), false, false); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); // Required at the end of the function // Switch BB's predecessor is loop condition BB, whose successor at index 1 is @@ -5468,10 +5657,12 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) { auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { return CodeGenIP; }; - auto FiniCB = [&](InsertPointTy IP) {}; + auto FiniCB = [&](InsertPointTy IP) { return Error::success(); }; - Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, - PrivCB, FiniCB, false, true)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createSections( + Loc, AllocaIP, SectionCBVector, PrivCB, FiniCB, false, true); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); // Required at the end of the function for (auto &Inst : instructions(*F)) { EXPECT_FALSE(isa(Inst) && @@ -5692,9 +5883,11 @@ TEST_F(OpenMPIRBuilderTest, TargetEnterData) { OMPBuilder.Config.setIsGPU(true); llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper; - Builder.restoreIP(OMPBuilder.createTargetData( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTargetData( Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), - /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); + /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); CallInst *TargetDataCall = dyn_cast(&BB->back()); EXPECT_NE(TargetDataCall, nullptr); @@ -5751,9 +5944,11 @@ TEST_F(OpenMPIRBuilderTest, TargetExitData) { OMPBuilder.Config.setIsGPU(true); llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper; - Builder.restoreIP(OMPBuilder.createTargetData( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTargetData( Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), - /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc)); + /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); CallInst *TargetDataCall = dyn_cast(&BB->back()); EXPECT_NE(TargetDataCall, nullptr); @@ -5859,9 +6054,12 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { return Builder.saveIP(); }; - Builder.restoreIP(OMPBuilder.createTargetData( - Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), - /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB)); + OpenMPIRBuilder::InsertPointOrErrorTy TargetDataIP1 = + OMPBuilder.createTargetData( + Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), + /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB); + assert(TargetDataIP1 && "unexpected error"); + Builder.restoreIP(*TargetDataIP1); CallInst *TargetDataCall = dyn_cast(&BB->back()); EXPECT_NE(TargetDataCall, nullptr); @@ -5884,9 +6082,12 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { EXPECT_EQ(TargetDataCall, nullptr); return Builder.saveIP(); }; - Builder.restoreIP(OMPBuilder.createTargetData( - Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), - /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB)); + OpenMPIRBuilder::InsertPointOrErrorTy TargetDataIP2 = + OMPBuilder.createTargetData( + Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), + /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB); + assert(TargetDataIP2 && "unexpected error"); + Builder.restoreIP(*TargetDataIP2); EXPECT_TRUE(CheckDevicePassBodyGen); Builder.CreateRetVoid(); @@ -5981,9 +6182,11 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) { TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17); OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL}); - Builder.restoreIP(OMPBuilder.createTarget( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget( OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(), - EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB)); + EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -6089,11 +6292,13 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, /*Line=*/3, /*Count=*/0); - Builder.restoreIP( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1, /*NumThreads=*/0, CapturedArgs, GenMapInfoCB, - BodyGenCB, SimpleArgAccessorCB)); + BodyGenCB, SimpleArgAccessorCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -6238,11 +6443,13 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2, /*Line=*/3, /*Count=*/0); - Builder.restoreIP( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, EntryInfo, /*NumTeams=*/-1, /*NumThreads=*/0, CapturedArgs, GenMapInfoCB, - BodyGenCB, SimpleArgAccessorCB)); + BodyGenCB, SimpleArgAccessorCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); Builder.CreateRetVoid(); OMPBuilder.finalize(); @@ -6354,15 +6561,17 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { Instruction *ThenTerm, *ElseTerm; SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(), &ThenTerm, &ElseTerm); + return Error::success(); }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); - Builder.restoreIP(OMPBuilder.createTask( - Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTask( + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -6460,15 +6669,18 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + return Error::success(); + }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); - Builder.restoreIP(OMPBuilder.createTask( - Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTask( + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -6490,14 +6702,18 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + return Error::success(); + }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); - Builder.restoreIP(OMPBuilder.createTask( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB, - /*Tied=*/false)); + /*Tied=*/false); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -6520,7 +6736,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + return Error::success(); + }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); OpenMPIRBuilder::LocationDescription Loc( @@ -6532,9 +6750,11 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { Type::getInt32Ty(M->getContext()), InDep); DDS.push_back(DDIn); } - Builder.restoreIP(OMPBuilder.createTask( + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB, - /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS)); + /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -6594,7 +6814,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + return Error::success(); + }; BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); @@ -6602,8 +6824,11 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { CmpInst::Predicate::ICMP_EQ, F->getArg(0), ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); - Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, - /*Tied=*/false, Final)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, + /*Tied=*/false, Final); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -6648,7 +6873,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {}; + auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + return Error::success(); + }; BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP(); Builder.SetInsertPoint(BodyBB); @@ -6656,9 +6883,11 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { CmpInst::Predicate::ICMP_EQ, F->getArg(0), ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); - Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, - /*Tied=*/false, /*Final=*/nullptr, - IfCondition)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = + OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, + /*Tied=*/false, /*Final=*/nullptr, IfCondition); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -6742,15 +6971,17 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { SplitBlockAndInsertIfThenElse(InternalIfCmp, CodeGenIP.getBlock()->getTerminator(), &ThenTerm, &ElseTerm); + return Error::success(); }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); - Builder.restoreIP(OMPBuilder.createTaskgroup( - Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTaskgroup( + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -6823,9 +7054,13 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64); Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64)); Builder.CreateStore(AddInst, Alloca64); + return Error::success(); }; OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); - Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1)); + OpenMPIRBuilder::InsertPointOrErrorTy TaskIP1 = + OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1); + assert(TaskIP1 && "unexpected error"); + Builder.restoreIP(*TaskIP1); auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { Builder.restoreIP(CodeGenIP); @@ -6833,18 +7068,24 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32); Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32)); Builder.CreateStore(AddInst, Alloca32); + return Error::success(); }; OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL); - Builder.restoreIP(OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2)); + OpenMPIRBuilder::InsertPointOrErrorTy TaskIP2 = + OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2); + assert(TaskIP2 && "unexpected error"); + Builder.restoreIP(*TaskIP2); + return Error::success(); }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL); - Builder.restoreIP(OMPBuilder.createTaskgroup( - Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB)); + OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTaskgroup( + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB); + assert(AfterIP && "unexpected error"); + Builder.restoreIP(*AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 27cd38dc3c62d95..fc2f88b766f1c56 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -137,9 +137,9 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder, /// region, and a branch from any block with an successor-less OpenMP terminator /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes /// of the continuation block if provided. -static llvm::BasicBlock *convertOmpOpRegions( +static llvm::Expected convertOmpOpRegions( Region ®ion, StringRef blockName, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus, + LLVM::ModuleTranslation &moduleTranslation, SmallVectorImpl *continuationBlockPHIs = nullptr) { llvm::BasicBlock *continuationBlock = splitBB(builder, true, "omp.region.cont"); @@ -215,10 +215,8 @@ static llvm::BasicBlock *convertOmpOpRegions( llvm::IRBuilderBase::InsertPointGuard guard(builder); if (failed( - moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) { - bodyGenStatus = failure(); - return continuationBlock; - } + moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) + return llvm::createStringError("failed region translation"); // Special handling for `omp.yield` and `omp.terminator` (we may have more // than one): they return the control to the parent OpenMP dialect operation @@ -270,21 +268,19 @@ convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { auto maskedOp = cast(opInst); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - // TODO: support error propagation in OpenMPIRBuilder and use it instead of - // relying on captured variables. - LogicalResult bodyGenStatus = success(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // MaskedOp has only one region associated with it. auto ®ion = maskedOp.getRegion(); builder.restoreIP(codeGenIP); - convertOmpOpRegions(region, "omp.masked.region", builder, moduleTranslation, - bodyGenStatus); + return convertOmpOpRegions(region, "omp.masked.region", builder, + moduleTranslation) + .takeError(); }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; llvm::Value *filterVal = nullptr; if (auto filterVar = maskedOp.getFilteredThreadId()) { @@ -296,8 +292,14 @@ convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, } assert(filterVal != nullptr); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMasked( - ompLoc, bodyGenCB, finiCB, filterVal)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB, + finiCB, filterVal); + + if (!afterIP) + return opInst.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); return success(); } @@ -306,25 +308,28 @@ static LogicalResult convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - // TODO: support error propagation in OpenMPIRBuilder and use it instead of - // relying on captured variables. - LogicalResult bodyGenStatus = success(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // MasterOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); builder.restoreIP(codeGenIP); - convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation, - bodyGenStatus); + return convertOmpOpRegions(region, "omp.master.region", builder, + moduleTranslation) + .takeError(); }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster( - ompLoc, bodyGenCB, finiCB)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB, + finiCB); + + if (!afterIP) + return opInst.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); return success(); } @@ -334,21 +339,19 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; auto criticalOp = cast(opInst); - // TODO: support error propagation in OpenMPIRBuilder and use it instead of - // relying on captured variables. - LogicalResult bodyGenStatus = success(); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // CriticalOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); builder.restoreIP(codeGenIP); - convertOmpOpRegions(region, "omp.critical.region", builder, - moduleTranslation, bodyGenStatus); + return convertOmpOpRegions(region, "omp.critical.region", builder, + moduleTranslation) + .takeError(); }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext(); @@ -366,8 +369,14 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), static_cast(criticalDeclareOp.getHint())); } - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical( - ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint)); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createCritical( + ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint); + + if (!afterIP) + return opInst.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); return success(); } @@ -468,27 +477,30 @@ static LogicalResult inlineConvertOmpRegions( return success(); } - LogicalResult bodyGenStatus = success(); SmallVector phis; - llvm::BasicBlock *continuationBlock = convertOmpOpRegions( - region, blockName, builder, moduleTranslation, bodyGenStatus, &phis); - if (failed(bodyGenStatus)) - return failure(); + llvm::Expected continuationBlock = + convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis); + + if (!continuationBlock) + return region.getParentOp()->emitError( + llvm::toString(continuationBlock.takeError())); + if (continuationBlockArgs) llvm::append_range(*continuationBlockArgs, phis); - builder.SetInsertPoint(continuationBlock, - continuationBlock->getFirstInsertionPt()); + builder.SetInsertPoint(*continuationBlock, + (*continuationBlock)->getFirstInsertionPt()); return success(); } namespace { /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to /// store lambdas with capture. -using OwningReductionGen = std::function; +using OwningReductionGen = + std::function; using OwningAtomicReductionGen = - std::function; } // namespace @@ -505,19 +517,20 @@ makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder, OwningReductionGen gen = [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Value *lhs, llvm::Value *rhs, - llvm::Value *&result) mutable { - moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs); - moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs); - builder.restoreIP(insertPoint); - SmallVector phis; - if (failed(inlineConvertOmpRegions(decl.getReductionRegion(), - "omp.reduction.nonatomic.body", - builder, moduleTranslation, &phis))) - return llvm::OpenMPIRBuilder::InsertPointTy(); - assert(phis.size() == 1); - result = phis[0]; - return builder.saveIP(); - }; + llvm::Value *&result) mutable + -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { + moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs); + moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs); + builder.restoreIP(insertPoint); + SmallVector phis; + if (failed(inlineConvertOmpRegions(decl.getReductionRegion(), + "omp.reduction.nonatomic.body", builder, + moduleTranslation, &phis))) + return llvm::createStringError("failed reduction region translation"); + assert(phis.size() == 1); + result = phis[0]; + return builder.saveIP(); + }; return gen; } @@ -537,18 +550,19 @@ makeAtomicReductionGen(omp::DeclareReductionOp decl, // avoid the dangling reference after the parent function returns. OwningAtomicReductionGen atomicGen = [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *, - llvm::Value *lhs, llvm::Value *rhs) mutable { - moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs); - moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs); - builder.restoreIP(insertPoint); - SmallVector phis; - if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(), - "omp.reduction.atomic.body", builder, - moduleTranslation, &phis))) - return llvm::OpenMPIRBuilder::InsertPointTy(); - assert(phis.empty()); - return builder.saveIP(); - }; + llvm::Value *lhs, llvm::Value *rhs) mutable + -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { + moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs); + moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs); + builder.restoreIP(insertPoint); + SmallVector phis; + if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(), + "omp.reduction.atomic.body", builder, + moduleTranslation, &phis))) + return llvm::createStringError("failed reduction region translation"); + assert(phis.empty()); + return builder.saveIP(); + }; return atomicGen; } @@ -593,27 +607,29 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, if (orderedRegionOp.getParLevelSimd()) return failure(); - // TODO: support error propagation in OpenMPIRBuilder and use it instead of - // relying on captured variables. - LogicalResult bodyGenStatus = success(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // OrderedOp has only one region associated with it. auto ®ion = cast(opInst).getRegion(); builder.restoreIP(codeGenIP); - convertOmpOpRegions(region, "omp.ordered.region", builder, - moduleTranslation, bodyGenStatus); + return convertOmpOpRegions(region, "omp.ordered.region", builder, + moduleTranslation) + .takeError(); }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP( + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd( - ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd())); - return bodyGenStatus; + ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd()); + + if (!afterIP) + return opInst.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); + return success(); } namespace { @@ -811,15 +827,24 @@ static LogicalResult createReductionsAndCleanup( // and remove it later. llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); builder.SetInsertPoint(tempTerminator); - llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = + llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint = ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, isByRef, op.getNowait()); - if (!contInsertPoint.getBlock()) + + if (!contInsertPoint) + return op.emitError(llvm::toString(contInsertPoint.takeError())); + + if (!contInsertPoint->getBlock()) return op->emitOpError() << "failed to convert reductions"; - auto nextInsertionPoint = - ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for); + + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for); + + if (!afterIP) + return op.emitError(llvm::toString(afterIP.takeError())); + tempTerminator->eraseFromParent(); - builder.restoreIP(nextInsertionPoint); + builder.restoreIP(*afterIP); // after the construct, deallocate private reduction variables SmallVector reductionRegions; @@ -958,7 +983,6 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation::SaveStack mappingGuard( moduleTranslation, reductionVariableMap); - LogicalResult bodyGenStatus = success(); SmallVector sectionCBs; for (Operation &op : *sectionsOp.getRegion().begin()) { @@ -967,9 +991,8 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, continue; Region ®ion = sectionOp.getRegion(); - auto sectionCB = [§ionsOp, ®ion, &builder, &moduleTranslation, - &bodyGenStatus](InsertPointTy allocaIP, - InsertPointTy codeGenIP) { + auto sectionCB = [§ionsOp, ®ion, &builder, &moduleTranslation]( + InsertPointTy allocaIP, InsertPointTy codeGenIP) { builder.restoreIP(codeGenIP); // map the omp.section reduction block argument to the omp.sections block @@ -985,8 +1008,9 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, moduleTranslation.mapValue(sectionArg, llvmVal); } - convertOmpOpRegions(region, "omp.section.region", builder, - moduleTranslation, bodyGenStatus); + return convertOmpOpRegions(region, "omp.section.region", builder, + moduleTranslation) + .takeError(); }; sectionCBs.push_back(sectionCB); } @@ -1003,24 +1027,27 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, // attribute (shared, private, firstprivate, ...) of variables. // Currently defaults to shared. auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &, - llvm::Value &vPtr, - llvm::Value *&replacementValue) -> InsertPointTy { + llvm::Value &vPtr, llvm::Value *&replacementValue) + -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { replacementValue = &vPtr; return codeGenIP; }; // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections( - ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, - sectionsOp.getNowait())); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createSections( + ompLoc, allocaIP, sectionCBs, privCB, finiCB, false, + sectionsOp.getNowait()); + + if (!afterIP) + return opInst.emitError(llvm::toString(afterIP.takeError())); - if (failed(bodyGenStatus)) - return bodyGenStatus; + builder.restoreIP(*afterIP); // Process the reductions if required. return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation, @@ -1034,16 +1061,17 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - LogicalResult bodyGenStatus = success(); + if (!singleOp.getPrivateVars().empty() || singleOp.getPrivateSyms()) return singleOp.emitError("unhandled clauses for translation to LLVM IR"); auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { builder.restoreIP(codegenIP); - convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder, - moduleTranslation, bodyGenStatus); + return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", + builder, moduleTranslation) + .takeError(); }; - auto finiCB = [&](InsertPointTy codeGenIP) {}; + auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; // Handle copyprivate Operation::operand_range cpVars = singleOp.getCopyprivateVars(); @@ -1058,9 +1086,16 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, moduleTranslation.lookupFunction(llvmFuncOp.getName())); } - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle( - ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, llvmCPFuncs)); - return bodyGenStatus; + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createSingle( + ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, + llvmCPFuncs); + + if (!afterIP) + return singleOp.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); + return success(); } // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder @@ -1068,7 +1103,6 @@ static LogicalResult convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - LogicalResult bodyGenStatus = success(); if (!op.getAllocatorVars().empty() || op.getReductionSyms() || !op.getPrivateVars().empty() || op.getPrivateSyms()) return op.emitError("unhandled clauses for translation to LLVM IR"); @@ -1077,8 +1111,9 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation::SaveStack frame( moduleTranslation, allocaIP); builder.restoreIP(codegenIP); - convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, - moduleTranslation, bodyGenStatus); + return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, + moduleTranslation) + .takeError(); }; llvm::Value *numTeamsLower = nullptr; @@ -1098,9 +1133,15 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, ifExpr = moduleTranslation.lookupValue(ifVar); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams( - ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr)); - return bodyGenStatus; + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createTeams( + ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr); + + if (!afterIP) + return op.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); + return success(); } static void @@ -1134,7 +1175,6 @@ static LogicalResult convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - LogicalResult bodyGenStatus = success(); if (taskOp.getUntiedAttr() || taskOp.getMergeableAttr() || taskOp.getInReductionSyms() || taskOp.getPriority() || !taskOp.getAllocateVars().empty() || !taskOp.getPrivateVars().empty() || @@ -1148,8 +1188,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, moduleTranslation, allocaIP); builder.restoreIP(codegenIP); - convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder, - moduleTranslation, bodyGenStatus); + return convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder, + moduleTranslation) + .takeError(); }; SmallVector dds; @@ -1159,11 +1200,17 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask( - ompLoc, allocaIP, bodyCB, !taskOp.getUntied(), - moduleTranslation.lookupValue(taskOp.getFinal()), - moduleTranslation.lookupValue(taskOp.getIfExpr()), dds)); - return bodyGenStatus; + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createTask( + ompLoc, allocaIP, bodyCB, !taskOp.getUntied(), + moduleTranslation.lookupValue(taskOp.getFinal()), + moduleTranslation.lookupValue(taskOp.getIfExpr()), dds); + + if (!afterIP) + return taskOp.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); + return success(); } /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder. @@ -1171,20 +1218,27 @@ static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - LogicalResult bodyGenStatus = success(); - if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) { + if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) return tgOp.emitError("unhandled clauses for translation to LLVM IR"); - } + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { builder.restoreIP(codegenIP); - convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder, - moduleTranslation, bodyGenStatus); + return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", + builder, moduleTranslation) + .takeError(); }; + InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup( - ompLoc, allocaIP, bodyCB)); - return bodyGenStatus; + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP, + bodyCB); + + if (!afterIP) + return tgOp.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); + return success(); } static LogicalResult @@ -1258,12 +1312,10 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); // Generator of the canonical loop body. - // TODO: support error propagation in OpenMPIRBuilder and use it instead of - // relying on captured variables. SmallVector loopInfos; SmallVector bodyInsertPoints; - LogicalResult bodyGenStatus = success(); - auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { + auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, + llvm::Value *iv) -> llvm::Error { // Make sure further conversions know about the induction variable. moduleTranslation.mapValue( loopOp.getRegion().front().getArgument(loopInfos.size()), iv); @@ -1274,12 +1326,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, bodyInsertPoints.push_back(ip); if (loopInfos.size() != loopOp.getNumLoops() - 1) - return; + return llvm::Error::success(); // Convert the body of the loop. builder.restoreIP(ip); - convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder, - moduleTranslation, bodyGenStatus); + return convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder, + moduleTranslation) + .takeError(); }; // Delegate actual loop construction to the OpenMP IRBuilder. @@ -1304,12 +1357,16 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back()); computeIP = loopInfos.front()->getPreheaderIP(); } - loopInfos.push_back(ompBuilder->createCanonicalLoop( - loc, bodyGen, lowerBound, upperBound, step, - /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP)); - if (failed(bodyGenStatus)) - return failure(); + llvm::Expected result = + ompBuilder->createCanonicalLoop( + loc, bodyGen, lowerBound, upperBound, step, + /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP); + + if (!result) + return loopOp.emitError(llvm::toString(result.takeError())); + + loopInfos.push_back(*result); } // Collapse loops. Store the insertion point because LoopInfos may get @@ -1325,11 +1382,15 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, std::optional scheduleMod = wsloopOp.getScheduleMod(); bool isSimd = wsloopOp.getScheduleSimd(); - ompBuilder->applyWorkshareLoop( - ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(), - convertToScheduleKind(schedule), chunk, isSimd, - scheduleMod == omp::ScheduleModifier::monotonic, - scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = + ompBuilder->applyWorkshareLoop( + ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(), + convertToScheduleKind(schedule), chunk, isSimd, + scheduleMod == omp::ScheduleModifier::monotonic, + scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered); + + if (!wsloopIP) + return opInst.emitError(llvm::toString(wsloopIP.takeError())); // Continue building IR after the loop. Note that the LoopInfo returned by // `collapseLoops` points inside the outermost loop and is intended for @@ -1350,10 +1411,6 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; ArrayRef isByRef = getIsByRef(opInst.getReductionByref()); assert(isByRef.size() == opInst.getNumReductionVars()); - - // TODO: support error propagation in OpenMPIRBuilder and use it instead of - // relying on captured variables. - LogicalResult bodyGenStatus = success(); llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); // Collect delayed privatization declarations @@ -1372,7 +1429,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, opInst.getNumReductionVars()); SmallVector deferredStores; - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto bodyGenCB = [&](InsertPointTy allocaIP, + InsertPointTy codeGenIP) -> llvm::Error { // Allocate private vars llvm::BranchInst *allocaTerminator = llvm::cast(allocaIP.getBlock()->getTerminator()); @@ -1418,10 +1476,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, builder.SetInsertPoint(privAllocBlock->getTerminator()); } if (failed(inlineConvertOmpRegions(allocRegion, "omp.private.alloc", - builder, moduleTranslation, &phis))) { - bodyGenStatus = failure(); - return; - } + builder, moduleTranslation, &phis))) + return llvm::createStringError( + "failed to inline `alloc` region of an `omp.private` op in the " + "parallel region"); + assert(phis.size() == 1 && "expected one allocation to be yielded"); moduleTranslation.mapValue(privateBlockArgs[i], phis[0]); @@ -1447,7 +1506,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, opInst, reductionArgs, builder, moduleTranslation, allocaIP, reductionDecls, privateReductionVariables, reductionVariableMap, deferredStores, isByRef))) - bodyGenStatus = failure(); + return llvm::createStringError("failed reduction vars allocation"); // Apply copy region for firstprivate. bool needsFirstprivate = @@ -1486,10 +1545,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // in-place convert copy region builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator()); if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy", - builder, moduleTranslation))) { - bodyGenStatus = failure(); - return; - } + builder, moduleTranslation))) + return llvm::createStringError( + "failed to inline `copy` region of an `omp.private` op in the " + "parallel region"); // ignore unused value yielded from copy region @@ -1538,7 +1597,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, if (failed(inlineConvertOmpRegions( reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral", builder, moduleTranslation, &phis))) - bodyGenStatus = failure(); + return llvm::createStringError( + "failed to inline `init` region of an `omp.declare_reduction` op " + "in the parallel region"); assert(phis.size() == 1 && "expected one value to be yielded from the " "reduction neutral element declaration region"); @@ -1582,9 +1643,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // ParallelOp has only one region associated with it. builder.restoreIP(codeGenIP); - auto regionBlock = - convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder, - moduleTranslation, bodyGenStatus); + llvm::Expected regionBlock = convertOmpOpRegions( + opInst.getRegion(), "omp.par.region", builder, moduleTranslation); + if (!regionBlock) + return regionBlock.takeError(); // Process the reductions if required. if (opInst.getNumReductionVars() > 0) { @@ -1597,23 +1659,25 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, privateReductionVariables, reductionInfos); // Move to region cont block - builder.SetInsertPoint(regionBlock->getTerminator()); + builder.SetInsertPoint((*regionBlock)->getTerminator()); // Generate reductions from info llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); builder.SetInsertPoint(tempTerminator); - llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint = + llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint = ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos, isByRef, false); - if (!contInsertPoint.getBlock()) { - bodyGenStatus = opInst->emitOpError() << "failed to convert reductions"; - return; - } + if (!contInsertPoint) + return contInsertPoint.takeError(); + + if (!contInsertPoint->getBlock()) + return llvm::createStringError("failed to convert reductions"); tempTerminator->eraseFromParent(); - builder.restoreIP(contInsertPoint); + builder.restoreIP(*contInsertPoint); } + return llvm::Error::success(); }; auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP, @@ -1626,7 +1690,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) { + auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error { InsertPointTy oldIP = builder.saveIP(); builder.restoreIP(codeGenIP); @@ -1640,7 +1704,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, if (failed(inlineOmpRegionCleanup( reductionCleanupRegions, privateReductionVariables, moduleTranslation, builder, "omp.reduction.cleanup"))) - bodyGenStatus = failure(); + return llvm::createStringError( + "failed to inline `cleanup` region of an `omp.declare_reduction` op " + "in the parallel region"); SmallVector privateCleanupRegions; llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions), @@ -1651,9 +1717,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, if (failed(inlineOmpRegionCleanup( privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder, "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false))) - bodyGenStatus = failure(); + return llvm::createStringError("failed to inline `dealloc` region of an " + "`omp.private` op in the parallel region"); builder.restoreIP(oldIP); + return llvm::Error::success(); }; llvm::Value *ifCond = nullptr; @@ -1672,11 +1740,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP( + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB, - ifCond, numThreads, pbKind, isCancellable)); + ifCond, numThreads, pbKind, isCancellable); + if (!afterIP) + return opInst.emitError(llvm::toString(afterIP.takeError())); - return bodyGenStatus; + builder.restoreIP(*afterIP); + return success(); } /// Convert Order attribute to llvm::omp::OrderKind. @@ -1718,12 +1789,10 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); // Generator of the canonical loop body. - // TODO: support error propagation in OpenMPIRBuilder and use it instead of - // relying on captured variables. SmallVector loopInfos; SmallVector bodyInsertPoints; - LogicalResult bodyGenStatus = success(); - auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) { + auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, + llvm::Value *iv) -> llvm::Error { // Make sure further conversions know about the induction variable. moduleTranslation.mapValue( loopOp.getRegion().front().getArgument(loopInfos.size()), iv); @@ -1734,12 +1803,13 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, bodyInsertPoints.push_back(ip); if (loopInfos.size() != loopOp.getNumLoops() - 1) - return; + return llvm::Error::success(); // Convert the body of the loop. builder.restoreIP(ip); - convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder, - moduleTranslation, bodyGenStatus); + return convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder, + moduleTranslation) + .takeError(); }; // Delegate actual loop construction to the OpenMP IRBuilder. @@ -1765,12 +1835,16 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, ompLoc.DL); computeIP = loopInfos.front()->getPreheaderIP(); } - loopInfos.push_back(ompBuilder->createCanonicalLoop( - loc, bodyGen, lowerBound, upperBound, step, - /*IsSigned=*/true, /*Inclusive=*/true, computeIP)); - if (failed(bodyGenStatus)) - return failure(); + llvm::Expected result = + ompBuilder->createCanonicalLoop( + loc, bodyGen, lowerBound, upperBound, step, + /*IsSigned=*/true, /*InclusiveStop=*/true, computeIP); + + if (!result) + return loopOp->emitError(llvm::toString(result.takeError())); + + loopInfos.push_back(*result); } // Collapse loops. @@ -1921,18 +1995,17 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, convertAtomicOrdering(opInst.getMemoryOrder()); // Generate update code. - LogicalResult updateGenStatus = success(); - auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus]( - llvm::Value *atomicx, - llvm::IRBuilder<> &builder) -> llvm::Value * { + auto updateFn = + [&opInst, &moduleTranslation]( + llvm::Value *atomicx, + llvm::IRBuilder<> &builder) -> llvm::Expected { Block &bb = *opInst.getRegion().begin(); moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx); moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); - if (failed(moduleTranslation.convertBlock(bb, true, builder))) { - updateGenStatus = (opInst.emitError() - << "unable to convert update operation to llvm IR"); - return nullptr; - } + if (failed(moduleTranslation.convertBlock(bb, true, builder))) + return llvm::createStringError( + "unable to convert update operation to llvm IR"); + omp::YieldOp yieldop = dyn_cast(bb.getTerminator()); assert(yieldop && yieldop.getResults().size() == 1 && "terminator must be omp.yield op and it must have exactly one " @@ -1943,10 +2016,16 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, // Handle ambiguous alloca, if any. auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(ompBuilder->createAtomicUpdate( - ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn, - isXBinopExpr)); - return updateGenStatus; + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr, + atomicOrdering, binop, updateFn, + isXBinopExpr); + + if (!afterIP) + return opInst.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); + return success(); } static LogicalResult @@ -2007,20 +2086,19 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, llvm::AtomicOrdering atomicOrdering = convertAtomicOrdering(atomicCaptureOp.getMemoryOrder()); - LogicalResult updateGenStatus = success(); - auto updateFn = [&](llvm::Value *atomicx, - llvm::IRBuilder<> &builder) -> llvm::Value * { + auto updateFn = + [&](llvm::Value *atomicx, + llvm::IRBuilder<> &builder) -> llvm::Expected { if (atomicWriteOp) return moduleTranslation.lookupValue(atomicWriteOp.getExpr()); Block &bb = *atomicUpdateOp.getRegion().begin(); moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(), atomicx); moduleTranslation.mapBlock(&bb, builder.GetInsertBlock()); - if (failed(moduleTranslation.convertBlock(bb, true, builder))) { - updateGenStatus = (atomicUpdateOp.emitError() - << "unable to convert update operation to llvm IR"); - return nullptr; - } + if (failed(moduleTranslation.convertBlock(bb, true, builder))) + return llvm::createStringError( + "unable to convert update operation to llvm IR"); + omp::YieldOp yieldop = dyn_cast(bb.getTerminator()); assert(yieldop && yieldop.getResults().size() == 1 && "terminator must be omp.yield op and it must have exactly one " @@ -2031,10 +2109,16 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, // Handle ambiguous alloca, if any. auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - builder.restoreIP(ompBuilder->createAtomicCapture( - ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering, - binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr)); - return updateGenStatus; + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = + ompBuilder->createAtomicCapture( + ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering, + binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr); + + if (!afterIP) + return atomicCaptureOp.emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); + return success(); } /// Converts an OpenMP Threadprivate operation into LLVM IR using @@ -3019,8 +3103,8 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, }; using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; - LogicalResult bodyGenStatus = success(); - auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) { + auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) + -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { assert(isa(op) && "BodyGen requested for non TargetDataOp"); auto blockArgIface = cast(op); @@ -3046,8 +3130,10 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, return info.DevicePtrInfoMap[basePointer].second; }); - bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region", - builder, moduleTranslation); + if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder, + moduleTranslation))) + return llvm::createStringError( + "failed to inline region of an `omp.target_data` op"); } break; case BodyGenTy::DupNoPriv: @@ -3067,8 +3153,10 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, mapData.BasePointers, mapData.DevicePointers); } - bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region", - builder, moduleTranslation); + if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder, + moduleTranslation))) + return llvm::createStringError( + "failed to inline region of an `omp.target_data` op"); } break; } @@ -3078,17 +3166,21 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); - if (isa(op)) { - builder.restoreIP(ompBuilder->createTargetData( - ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond, - info, genMapInfoCB, nullptr, bodyGenCB)); - } else { - builder.restoreIP(ompBuilder->createTargetData( - ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond, - info, genMapInfoCB, &RTLFn)); - } - - return bodyGenStatus; + llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() { + if (isa(op)) + return ompBuilder->createTargetData( + ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), + ifCond, info, genMapInfoCB, nullptr, bodyGenCB); + return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(), + builder.getInt64(deviceID), ifCond, + info, genMapInfoCB, &RTLFn); + }(); + + if (!afterIP) + return op->emitError(llvm::toString(afterIP.takeError())); + + builder.restoreIP(*afterIP); + return success(); } /// Lowers the FlagsAttr which is applied to the module on the device @@ -3320,6 +3412,8 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg, } case omp::VariableCaptureKind::This: case omp::VariableCaptureKind::VLAType: + // TODO: Consider returning error to use standard reporting for + // unimplemented features. assert(false && "Currently unsupported capture kind"); break; } @@ -3350,10 +3444,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, bool isOffloadEntry = isTargetDevice || !ompBuilder->Config.TargetTriples.empty(); - LogicalResult bodyGenStatus = success(); using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - auto bodyCB = [&](InsertPointTy allocaIP, - InsertPointTy codeGenIP) -> InsertPointTy { + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) + -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { // Forward target-cpu and target-features function attributes from the // original function to the new outlined function. llvm::Function *llvmParentFn = @@ -3396,34 +3489,35 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, if (privatizer.getDataSharingType() == omp::DataSharingClauseType::FirstPrivate || !privatizer.getDeallocRegion().empty()) { - opInst.emitError("Translation of omp.target from MLIR to LLVMIR " - "failed because translation of firstprivate and " - " private allocatables is not supported yet"); - bodyGenStatus = failure(); - } else { - moduleTranslation.mapValue(privatizer.getAllocMoldArg(), - moduleTranslation.lookupValue(privVar)); - Region &allocRegion = privatizer.getAllocRegion(); - SmallVector yieldedValues; - if (failed(inlineConvertOmpRegions( - allocRegion, "omp.targetop.privatizer", builder, - moduleTranslation, &yieldedValues))) { - opInst.emitError( - "failed to inline `alloc` region of an `omp.private` " - "op in the target region"); - bodyGenStatus = failure(); - } else { - assert(yieldedValues.size() == 1); - moduleTranslation.mapValue(privBlockArg, yieldedValues.front()); - } - moduleTranslation.forgetMapping(allocRegion); - builder.restoreIP(builder.saveIP()); + return llvm::createStringError( + "Translation of omp.target from MLIR to LLVMIR " + "failed because translation of firstprivate and " + " private allocatables is not supported yet"); } + moduleTranslation.mapValue(privatizer.getAllocMoldArg(), + moduleTranslation.lookupValue(privVar)); + Region &allocRegion = privatizer.getAllocRegion(); + SmallVector yieldedValues; + if (failed(inlineConvertOmpRegions( + allocRegion, "omp.targetop.privatizer", builder, + moduleTranslation, &yieldedValues))) { + return llvm::createStringError( + "failed to inline `alloc` region of an `omp.private` " + "op in the target region"); + } + assert(yieldedValues.size() == 1); + moduleTranslation.mapValue(privBlockArg, yieldedValues.front()); + moduleTranslation.forgetMapping(allocRegion); + builder.restoreIP(builder.saveIP()); } } - llvm::BasicBlock *exitBlock = convertOmpOpRegions( - targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus); - builder.SetInsertPoint(exitBlock); + + llvm::Expected exitBlock = convertOmpOpRegions( + targetRegion, "omp.target", builder, moduleTranslation); + if (!exitBlock) + return exitBlock.takeError(); + + builder.SetInsertPoint(*exitBlock); return builder.saveIP(); }; @@ -3455,7 +3549,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input, llvm::Value *&retVal, InsertPointTy allocaIP, - InsertPointTy codeGenIP) { + InsertPointTy codeGenIP) + -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { // We just return the unaltered argument for the host function // for now, some alterations may be required in the future to // keep host fallback functions working identically to the device @@ -3486,10 +3581,16 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(), moduleTranslation, dds); - builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget( - ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo, - defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB, - argAccessorCB, dds, targetOp.getNowait())); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy result = + moduleTranslation.getOpenMPBuilder()->createTarget( + ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo, + defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB, + argAccessorCB, dds, targetOp.getNowait()); + + if (!result) + return opInst.emitError(llvm::toString(result.takeError())); + + builder.restoreIP(*result); // Remap access operations to declare target reference pointers for the // device, essentially generating extra loadop's as necessary @@ -3497,7 +3598,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, handleDeclareTargetMapVar(mapData, moduleTranslation, builder, llvmOutlinedFn); - return bodyGenStatus; + return success(); } static LogicalResult @@ -3618,8 +3719,13 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); return llvm::TypeSwitch(op) - .Case([&](omp::BarrierOp) { - ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier); + .Case([&](omp::BarrierOp) -> LogicalResult { + llvm::OpenMPIRBuilder::InsertPointOrErrorTy result = + ompBuilder->createBarrier(builder.saveIP(), + llvm::omp::OMPD_barrier); + if (!result) + return op->emitError(llvm::toString(result.takeError())); + return success(); }) .Case([&](omp::TaskyieldOp) { From 85af1926f74e88e14c9f58946537e025a74ccf7e Mon Sep 17 00:00:00 2001 From: Abid Qadeer Date: Fri, 25 Oct 2024 11:43:25 +0100 Subject: [PATCH 004/425] [flang][debug] Support mlir::NoneType. (#113550) --- .../Optimizer/Transforms/DebugTypeGenerator.cpp | 2 ++ flang/test/Transforms/debug-none-type.fir | 14 ++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 flang/test/Transforms/debug-none-type.fir diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 1ab6c76dae8eda7..e387e27533a0060 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -581,6 +581,8 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, /*genAssociated=*/false); } else if (auto vecTy = mlir::dyn_cast_or_null(Ty)) { return convertVectorType(vecTy, fileAttr, scope, declOp); + } else if (mlir::isa(Ty)) { + return mlir::LLVM::DINullTypeAttr::get(context); } else if (auto boxTy = mlir::dyn_cast_or_null(Ty)) { auto elTy = boxTy.getElementType(); if (auto seqTy = mlir::dyn_cast_or_null(elTy)) diff --git a/flang/test/Transforms/debug-none-type.fir b/flang/test/Transforms/debug-none-type.fir new file mode 100644 index 000000000000000..5eee6a63321904f --- /dev/null +++ b/flang/test/Transforms/debug-none-type.fir @@ -0,0 +1,14 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func private @_FortranAAssign(i8) -> none loc(#loc1) + func.func private @foo() -> !fir.ref loc(#loc2) +} +#loc1 = loc("test.f90":5:1) +#loc2 = loc("test.f90":15:1) + +// CHECK: #[[INT8_TY:.*]] = #llvm.di_basic_type +// CHECK: #[[NONE_TY:.*]] = #llvm.di_null_type +// CHECK: #[[REFNONE_TY:.*]] = #llvm.di_derived_type +// CHECK: #llvm.di_subroutine_type<{{.*}}types = #[[NONE_TY]], #[[INT8_TY]]> +// CHECK: #llvm.di_subroutine_type<{{.*}}types = #[[REFNONE_TY]]> From c2d2b3b80866633b4db65c3841c40c16815267f3 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Fri, 25 Oct 2024 18:43:40 +0800 Subject: [PATCH 005/425] [test] Avoid writing to a potentially write-protected dir (#113674) --- clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c | 2 +- clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c index 944033724a6a2bb..a4d887f0be41646 100644 --- a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c +++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -verify +// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -verify #include __m512i test_mm512_loadrs_epi8(const __m512i * __A) { diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c index 68608b0cbff09f7..2a7204e39b8300e 100644 --- a/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c +++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -verify +// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2-256 -verify #include __m128i test_mm_loadrs_epi8(const __m128i * __A) { From 2d26ef09fc87472cd42ea219c8f9267599872958 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 25 Oct 2024 12:06:57 +0100 Subject: [PATCH 006/425] [lldb-dap][test] Set disableASLR to False for tests (#113593) When running in constrained environments like docker, disabling ASLR might fail with errors like: ``` AssertionError: False is not true : launch failed (Cannot launch '/__w/.../lldb-dap/stackTrace/subtleFrames/TestDAP_subtleFrames.test_subtleFrames/a.out': personality set failed: Operation not permitted) ``` E.g., https://github.com/llvm/llvm-project/pull/110303 Hence we already run `settings set target.disable-aslr false` as part of the init-commands for the non-DAP tests (see https://github.com/llvm/llvm-project/pull/88312 and https://discourse.llvm.org/t/running-lldb-in-a-container/76801). But we never adjusted it for the DAP tests. As a result we get conflicting test logs like: ``` { "arguments": { "commandEscapePrefix": null, "disableASLR": true, .... "initCommands": [ ... "settings set target.disable-aslr false", ``` Disabling ASLR by default in tests isn't useulf (it's only really a debugging aid for users). So this patch sets `disableASLR=False` by default. --- .../Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index 7e80912be446423..a25466f07fa557f 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -367,7 +367,7 @@ def launch( cwd=None, env=None, stopOnEntry=False, - disableASLR=True, + disableASLR=False, disableSTDIO=False, shellExpandArguments=False, trace=False, @@ -451,7 +451,7 @@ def build_and_launch( cwd=None, env=None, stopOnEntry=False, - disableASLR=True, + disableASLR=False, disableSTDIO=False, shellExpandArguments=False, trace=False, From 2c0b34852af4dc9964f8bf6db303bd54a32856e7 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Fri, 25 Oct 2024 04:24:50 -0700 Subject: [PATCH 007/425] [RISCV] Mark pointer masking extensions as non-experimental (#113618) These extensions were ratified very recently. I've ensured we have definitions for all extensions in the document . There are no additional CSRs. --- .../Driver/print-supported-extensions-riscv.c | 10 +++++----- clang/test/Driver/riscv-profiles.c | 2 +- .../test/Preprocessor/riscv-target-features.c | 16 +++++++-------- llvm/docs/RISCVUsage.rst | 8 +++++--- llvm/docs/ReleaseNotes.md | 2 ++ llvm/lib/Target/RISCV/RISCVFeatures.td | 20 +++++++++---------- llvm/test/CodeGen/RISCV/attributes.ll | 20 +++++++++---------- llvm/test/CodeGen/RISCV/rvv/pr107950.ll | 2 +- .../TargetParser/RISCVISAInfoTest.cpp | 10 +++++----- 9 files changed, 47 insertions(+), 43 deletions(-) diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index 65375b79cb680dc..342d6e921a5a83e 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -120,6 +120,8 @@ // CHECK-NEXT: smcdeleg 1.0 'Smcdeleg' (Counter Delegation Machine Level) // CHECK-NEXT: smcsrind 1.0 'Smcsrind' (Indirect CSR Access Machine Level) // CHECK-NEXT: smepmp 1.0 'Smepmp' (Enhanced Physical Memory Protection) +// CHECK-NEXT: smmpm 1.0 'Smmpm' (Machine-level Pointer Masking for M-mode) +// CHECK-NEXT: smnpm 1.0 'Smnpm' (Machine-level Pointer Masking for next lower privilege mode) // CHECK-NEXT: smrnmi 1.0 'Smrnmi' (Resumable Non-Maskable Interrupts) // CHECK-NEXT: smstateen 1.0 'Smstateen' (Machine-mode view of the state-enable extension) // CHECK-NEXT: ssaia 1.0 'Ssaia' (Advanced Interrupt Architecture Supervisor Level) @@ -128,6 +130,8 @@ // CHECK-NEXT: sscofpmf 1.0 'Sscofpmf' (Count Overflow and Mode-Based Filtering) // CHECK-NEXT: sscounterenw 1.0 'Sscounterenw' (Support writeable scounteren enable bit for any hpmcounter that is not read-only zero) // CHECK-NEXT: sscsrind 1.0 'Sscsrind' (Indirect CSR Access Supervisor Level) +// CHECK-NEXT: ssnpm 1.0 'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode) +// CHECK-NEXT: sspm 1.0 'Sspm' (Indicates Supervisor-mode Pointer Masking) // CHECK-NEXT: ssqosid 1.0 'Ssqosid' (Quality-of-Service (QoS) Identifiers) // CHECK-NEXT: ssstateen 1.0 'Ssstateen' (Supervisor-mode view of the state-enable extension) // CHECK-NEXT: ssstrict 1.0 'Ssstrict' (No non-conforming extensions are present) @@ -135,6 +139,7 @@ // CHECK-NEXT: sstvala 1.0 'Sstvala' (stval provides all needed values) // CHECK-NEXT: sstvecd 1.0 'Sstvecd' (stvec supports Direct mode) // CHECK-NEXT: ssu64xl 1.0 'Ssu64xl' (UXLEN=64 supported) +// CHECK-NEXT: supm 1.0 'Supm' (Indicates User-mode Pointer Masking) // CHECK-NEXT: svade 1.0 'Svade' (Raise exceptions on improper A/D bits) // CHECK-NEXT: svadu 1.0 'Svadu' (Hardware A/D updates) // CHECK-NEXT: svbare 1.0 'Svbare' $(satp mode Bare supported) @@ -177,12 +182,7 @@ // CHECK-NEXT: zvbc32e 0.7 'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements) // CHECK-NEXT: zvkgs 0.7 'Zvkgs' (Vector-Scalar GCM instructions for Cryptography) // CHECK-NEXT: smctr 1.0 'Smctr' (Control Transfer Records Machine Level) -// CHECK-NEXT: smmpm 1.0 'Smmpm' (Machine-level Pointer Masking for M-mode) -// CHECK-NEXT: smnpm 1.0 'Smnpm' (Machine-level Pointer Masking for next lower privilege mode) // CHECK-NEXT: ssctr 1.0 'Ssctr' (Control Transfer Records Supervisor Level) -// CHECK-NEXT: ssnpm 1.0 'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode) -// CHECK-NEXT: sspm 1.0 'Sspm' (Indicates Supervisor-mode Pointer Masking) -// CHECK-NEXT: supm 1.0 'Supm' (Indicates User-mode Pointer Masking) // CHECK-EMPTY: // CHECK-NEXT: Supported Profiles // CHECK-NEXT: rva20s64 diff --git a/clang/test/Driver/riscv-profiles.c b/clang/test/Driver/riscv-profiles.c index 55aa5b398cee98d..42e23cf57c880f0 100644 --- a/clang/test/Driver/riscv-profiles.c +++ b/clang/test/Driver/riscv-profiles.c @@ -195,7 +195,7 @@ // RVA23S64: "-target-feature" "+ssccptr" // RVA23S64: "-target-feature" "+sscofpmf" // RVA23S64: "-target-feature" "+sscounterenw" -// RVA23S64: "-target-feature" "+experimental-ssnpm" +// RVA23S64: "-target-feature" "+ssnpm" // RVA23S64: "-target-feature" "+ssstateen" // RVA23S64: "-target-feature" "+sstc" // RVA23S64: "-target-feature" "+sstvala" diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 9e986f0143aefab..98ad564d2b84084 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -1719,10 +1719,10 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-ZICFISS-EXT %s // CHECK-ZICFISS-EXT: __riscv_zicfiss 1000000{{$}} -// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: %clang --target=riscv32 \ // RUN: -march=rv32i_ssnpm1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SSNPM-EXT %s -// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 \ // RUN: -march=rv64i_ssnpm1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SSNPM-EXT %s // CHECK-SSNPM-EXT: __riscv_ssnpm 1000000{{$}} @@ -1735,26 +1735,26 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-SMNPM-EXT %s // CHECK-SMNPM-EXT: __riscv_smnpm 1000000{{$}} -// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: %clang --target=riscv32 \ // RUN: -march=rv32i_smmpm1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SMMPM-EXT %s -// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 \ // RUN: -march=rv64i_smmpm1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SMMPM-EXT %s // CHECK-SMMPM-EXT: __riscv_smmpm 1000000{{$}} -// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: %clang --target=riscv32 \ // RUN: -march=rv32i_sspm1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SSPM-EXT %s -// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 \ // RUN: -march=rv64i_sspm1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SSPM-EXT %s // CHECK-SSPM-EXT: __riscv_sspm 1000000{{$}} -// RUN: %clang --target=riscv32 -menable-experimental-extensions \ +// RUN: %clang --target=riscv32 \ // RUN: -march=rv32i_supm1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SUPM-EXT %s -// RUN: %clang --target=riscv64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 \ // RUN: -march=rv64i_supm1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SUPM-EXT %s // CHECK-SUPM-EXT: __riscv_supm 1000000{{$}} diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 2135ed3b1114180..ab58cdaa1b2f95d 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -129,6 +129,8 @@ on support follow. ``Smcdeleg`` Supported ``Smcsrind`` Supported ``Smepmp`` Supported + ``Smmpm`` Supported + ``Smnpm`` Supported ``Smrnmi`` Assembly Support ``Smstateen`` Assembly Support ``Ssaia`` Supported @@ -137,6 +139,8 @@ on support follow. ``Sscofpmf`` Assembly Support ``Sscounterenw`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) ``Sscsrind`` Supported + ``Ssnpm`` Supported + ``Sspm`` Supported ``Ssqosid`` Assembly Support ``Ssstateen`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) ``Ssstrict`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) @@ -144,6 +148,7 @@ on support follow. ``Sstvala`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) ``Sstvecd`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) ``Ssu64xl`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) + ``Supm`` Supported ``Svade`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) ``Svadu`` Assembly Support ``Svbare`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) @@ -308,9 +313,6 @@ LLVM supports (to various degrees) a number of experimental extensions. All exp The primary goal of experimental support is to assist in the process of ratification by providing an existence proof of an implementation, and simplifying efforts to validate the value of a proposed extension against large code bases. Experimental extensions are expected to either transition to ratified status, or be eventually removed. The decision on whether to accept an experimental extension is currently done on an entirely case by case basis; if you want to propose one, attending the bi-weekly RISC-V sync-up call is strongly advised. -``experimental-ssnpm``, ``experimental-smnpm``, ``experimental-smmpm``, ``experimental-sspm``, ``experimental-supm`` - LLVM implements the `v1.0.0-rc2 specification `__. - ``experimental-zalasr`` LLVM implements the `0.0.5 draft specification `__. diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 706546980cf6718..7cca9116a513451 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -171,6 +171,8 @@ Changes to the RISC-V Backend * Added `Smctr` and `Ssctr` extensions. * `-mcpu=syntacore-scr7` was added. * The `Zacas` extension is no longer marked as experimental. +* The `Smmpm`, `Smnpm`, `Ssnpm`, `Supm`, and `Sspm` pointer masking extensions + are no longer marked as experimental. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 7ccce2e1c70b226..778df542022f226 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1035,33 +1035,33 @@ def FeatureStdExtSvpbmt // privilege mode (U-mode), and for VS- and VU-modes if the H extension is // present. def FeatureStdExtSsnpm - : RISCVExperimentalExtension<"ssnpm", 1, 0, - "'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode)">; + : RISCVExtension<"ssnpm", 1, 0, + "'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode)">; // A machine-level extension that provides pointer masking for the next lower // privilege mode (S/HS if S-mode is implemented, or U-mode otherwise). def FeatureStdExtSmnpm - : RISCVExperimentalExtension<"smnpm", 1, 0, - "'Smnpm' (Machine-level Pointer Masking for next lower privilege mode)">; + : RISCVExtension<"smnpm", 1, 0, + "'Smnpm' (Machine-level Pointer Masking for next lower privilege mode)">; // A machine-level extension that provides pointer masking for M-mode. def FeatureStdExtSmmpm - : RISCVExperimentalExtension<"smmpm", 1, 0, - "'Smmpm' (Machine-level Pointer Masking for M-mode)">; + : RISCVExtension<"smmpm", 1, 0, + "'Smmpm' (Machine-level Pointer Masking for M-mode)">; // An extension that indicates that there is pointer-masking support available // in supervisor mode, with some facility provided in the supervisor execution // environment to control pointer masking. def FeatureStdExtSspm - : RISCVExperimentalExtension<"sspm", 1, 0, - "'Sspm' (Indicates Supervisor-mode Pointer Masking)">; + : RISCVExtension<"sspm", 1, 0, + "'Sspm' (Indicates Supervisor-mode Pointer Masking)">; // An extension that indicates that there is pointer-masking support available // in user mode, with some facility provided in the application execution // environment to control pointer masking. def FeatureStdExtSupm - : RISCVExperimentalExtension<"supm", 1, 0, - "'Supm' (Indicates User-mode Pointer Masking)">; + : RISCVExtension<"supm", 1, 0, + "'Supm' (Indicates User-mode Pointer Masking)">; def FeatureStdExtSmctr : RISCVExperimentalExtension<"smctr", 1, 0, diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index aa27d63bfa6262f..e9743d484f776f0 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -129,11 +129,11 @@ ; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha %s -o - | FileCheck --check-prefix=RV32ZABHA %s ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+experimental-zvbc32e %s -o - | FileCheck --check-prefix=RV32ZVBC32E %s ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+experimental-zvkgs %s -o - | FileCheck --check-prefix=RV32ZVKGS %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-ssnpm %s -o - | FileCheck --check-prefix=RV32SSNPM %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-smnpm %s -o - | FileCheck --check-prefix=RV32SMNPM %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-smmpm %s -o - | FileCheck --check-prefix=RV32SMMPM %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-sspm %s -o - | FileCheck --check-prefix=RV32SSPM %s -; RUN: llc -mtriple=riscv32 -mattr=+experimental-supm %s -o - | FileCheck --check-prefix=RV32SUPM %s +; RUN: llc -mtriple=riscv32 -mattr=+ssnpm %s -o - | FileCheck --check-prefix=RV32SSNPM %s +; RUN: llc -mtriple=riscv32 -mattr=+smnpm %s -o - | FileCheck --check-prefix=RV32SMNPM %s +; RUN: llc -mtriple=riscv32 -mattr=+smmpm %s -o - | FileCheck --check-prefix=RV32SMMPM %s +; RUN: llc -mtriple=riscv32 -mattr=+sspm %s -o - | FileCheck --check-prefix=RV32SSPM %s +; RUN: llc -mtriple=riscv32 -mattr=+supm %s -o - | FileCheck --check-prefix=RV32SUPM %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-smctr %s -o - | FileCheck --check-prefix=RV32SMCTR %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-ssctr %s -o - | FileCheck --check-prefix=RV32SSCTR %s @@ -272,11 +272,11 @@ ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha %s -o - | FileCheck --check-prefix=RV64ZABHA %s ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+experimental-zvbc32e %s -o - | FileCheck --check-prefix=RV64ZVBC32E %s ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+experimental-zvkgs %s -o - | FileCheck --check-prefix=RV64ZVKGS %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-ssnpm %s -o - | FileCheck --check-prefix=RV64SSNPM %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-smnpm %s -o - | FileCheck --check-prefix=RV64SMNPM %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-smmpm %s -o - | FileCheck --check-prefix=RV64SMMPM %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-sspm %s -o - | FileCheck --check-prefix=RV64SSPM %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-supm %s -o - | FileCheck --check-prefix=RV64SUPM %s +; RUN: llc -mtriple=riscv64 -mattr=+ssnpm %s -o - | FileCheck --check-prefix=RV64SSNPM %s +; RUN: llc -mtriple=riscv64 -mattr=+smnpm %s -o - | FileCheck --check-prefix=RV64SMNPM %s +; RUN: llc -mtriple=riscv64 -mattr=+smmpm %s -o - | FileCheck --check-prefix=RV64SMMPM %s +; RUN: llc -mtriple=riscv64 -mattr=+sspm %s -o - | FileCheck --check-prefix=RV64SSPM %s +; RUN: llc -mtriple=riscv64 -mattr=+supm %s -o - | FileCheck --check-prefix=RV64SUPM %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-smctr %s -o - | FileCheck --check-prefix=RV64SMCTR %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-ssctr %s -o - | FileCheck --check-prefix=RV64SSCTR %s diff --git a/llvm/test/CodeGen/RISCV/rvv/pr107950.ll b/llvm/test/CodeGen/RISCV/rvv/pr107950.ll index 8384008c245fc25..9d93ed3172132fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr107950.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr107950.ll @@ -26,5 +26,5 @@ entry: ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read) declare @llvm.masked.gather.nxv4i32.nxv4p0(, i32 immarg, , ) #1 -attributes #0 = { "target-features"="+64bit,+d,+f,+relax,+v,+xsifivecdiscarddlone,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-a,-b,-c,-e,-experimental-smctr,-experimental-smmpm,-experimental-smnpm,-experimental-ssctr,-experimental-ssnpm,-experimental-sspm,-experimental-supm,-experimental-zacas,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-m,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zifencei,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" } +attributes #0 = { "target-features"="+64bit,+d,+f,+relax,+v,+xsifivecdiscarddlone,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-a,-b,-c,-e,-experimental-smctr,-smmpm,-smnpm,-experimental-ssctr,-ssnpm,-sspm,-supm,-experimental-zacas,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-m,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zifencei,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(read) } diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index ded43a4ff7875ad..2e9c548ae872f39 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -1065,6 +1065,8 @@ R"(All available -march extensions for RISC-V smcdeleg 1.0 smcsrind 1.0 smepmp 1.0 + smmpm 1.0 + smnpm 1.0 smrnmi 1.0 smstateen 1.0 ssaia 1.0 @@ -1073,6 +1075,8 @@ R"(All available -march extensions for RISC-V sscofpmf 1.0 sscounterenw 1.0 sscsrind 1.0 + ssnpm 1.0 + sspm 1.0 ssqosid 1.0 ssstateen 1.0 ssstrict 1.0 @@ -1080,6 +1084,7 @@ R"(All available -march extensions for RISC-V sstvala 1.0 sstvecd 1.0 ssu64xl 1.0 + supm 1.0 svade 1.0 svadu 1.0 svbare 1.0 @@ -1122,12 +1127,7 @@ Experimental extensions zvbc32e 0.7 zvkgs 0.7 smctr 1.0 - smmpm 1.0 - smnpm 1.0 ssctr 1.0 - ssnpm 1.0 - sspm 1.0 - supm 1.0 Supported Profiles rva20s64 From 90cdc03e7f5bda2e31573d48450a8ac8fa856efa Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 25 Oct 2024 12:56:10 +0100 Subject: [PATCH 008/425] [IR] Fix undiagnosed cases of structs containing scalable vectors (#113455) Type::isScalableTy and StructType::containsScalableVectorType failed to detect some cases of structs containing scalable vectors because containsScalableVectorType did not call back into isScalableTy to check the element types. Fix this, which requires sharing the same Visited set in both functions. Also change the external API so that callers are never required to pass in a Visited set, and normalize the naming to isScalableTy. --- llvm/include/llvm/IR/DerivedTypes.h | 4 +-- llvm/include/llvm/IR/Type.h | 1 + llvm/lib/AsmParser/LLParser.cpp | 2 +- llvm/lib/IR/Type.cpp | 29 ++++++++----------- llvm/lib/IR/Verifier.cpp | 3 +- .../InstCombine/InstructionCombining.cpp | 2 +- llvm/test/Verifier/scalable-global-vars.ll | 14 +++++++++ 7 files changed, 32 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index a24801d8bdf834f..820b5c0707df6cf 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -290,8 +290,8 @@ class StructType : public Type { bool isSized(SmallPtrSetImpl *Visited = nullptr) const; /// Returns true if this struct contains a scalable vector. - bool - containsScalableVectorType(SmallPtrSetImpl *Visited = nullptr) const; + bool isScalableTy(SmallPtrSetImpl &Visited) const; + using Type::isScalableTy; /// Returns true if this struct contains homogeneous scalable vector types. /// Note that the definition of homogeneous scalable vector type is not diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h index 2f53197df199986..d563b25d600a0c3 100644 --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -206,6 +206,7 @@ class Type { bool isScalableTargetExtTy() const; /// Return true if this is a type whose size is a known multiple of vscale. + bool isScalableTy(SmallPtrSetImpl &Visited) const; bool isScalableTy() const; /// Return true if this is a FP type or a vector of FP. diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 6a2372c97514087..8ddb2efb0e26c24 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -8525,7 +8525,7 @@ int LLParser::parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { return error(Loc, "base element of getelementptr must be sized"); auto *STy = dyn_cast(Ty); - if (STy && STy->containsScalableVectorType()) + if (STy && STy->isScalableTy()) return error(Loc, "getelementptr cannot target structure that contains " "scalable vector type"); diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index f618263f79c3133..912b1a3960ef196 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -58,16 +58,19 @@ bool Type::isIntegerTy(unsigned Bitwidth) const { return isIntegerTy() && cast(this)->getBitWidth() == Bitwidth; } -bool Type::isScalableTy() const { +bool Type::isScalableTy(SmallPtrSetImpl &Visited) const { if (const auto *ATy = dyn_cast(this)) - return ATy->getElementType()->isScalableTy(); - if (const auto *STy = dyn_cast(this)) { - SmallPtrSet Visited; - return STy->containsScalableVectorType(&Visited); - } + return ATy->getElementType()->isScalableTy(Visited); + if (const auto *STy = dyn_cast(this)) + return STy->isScalableTy(Visited); return getTypeID() == ScalableVectorTyID || isScalableTargetExtTy(); } +bool Type::isScalableTy() const { + SmallPtrSet Visited; + return isScalableTy(Visited); +} + const fltSemantics &Type::getFltSemantics() const { switch (getTypeID()) { case HalfTyID: return APFloat::IEEEhalf(); @@ -394,30 +397,22 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef ETypes, return ST; } -bool StructType::containsScalableVectorType( - SmallPtrSetImpl *Visited) const { +bool StructType::isScalableTy(SmallPtrSetImpl &Visited) const { if ((getSubclassData() & SCDB_ContainsScalableVector) != 0) return true; if ((getSubclassData() & SCDB_NotContainsScalableVector) != 0) return false; - if (Visited && !Visited->insert(const_cast(this)).second) + if (!Visited.insert(this).second) return false; for (Type *Ty : elements()) { - if (isa(Ty)) { + if (Ty->isScalableTy(Visited)) { const_cast(this)->setSubclassData( getSubclassData() | SCDB_ContainsScalableVector); return true; } - if (auto *STy = dyn_cast(Ty)) { - if (STy->containsScalableVectorType(Visited)) { - const_cast(this)->setSubclassData( - getSubclassData() | SCDB_ContainsScalableVector); - return true; - } - } } // For structures that are opaque, return false but do not set the diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index f34fe7594c8602c..60e65392218dadf 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4107,8 +4107,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { Check(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP); if (auto *STy = dyn_cast(GEP.getSourceElementType())) { - SmallPtrSet Visited; - Check(!STy->containsScalableVectorType(&Visited), + Check(!STy->isScalableTy(), "getelementptr cannot target structure that contains scalable vector" "type", &GEP); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index c8b9f166b160205..971ace2a4f4716f 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -4087,7 +4087,7 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) { if (LoadInst *L = dyn_cast(Agg)) { // Bail out if the aggregate contains scalable vector type if (auto *STy = dyn_cast(Agg->getType()); - STy && STy->containsScalableVectorType()) + STy && STy->isScalableTy()) return nullptr; // If the (non-volatile) load only has one use, we can rewrite this to a diff --git a/llvm/test/Verifier/scalable-global-vars.ll b/llvm/test/Verifier/scalable-global-vars.ll index 81882261e664ef6..fb9a3067acba982 100644 --- a/llvm/test/Verifier/scalable-global-vars.ll +++ b/llvm/test/Verifier/scalable-global-vars.ll @@ -15,3 +15,17 @@ ; CHECK-NEXT: ptr @ScalableVecStructGlobal @ScalableVecStructGlobal = global { i32, } zeroinitializer +; CHECK-NEXT: Globals cannot contain scalable types +; CHECK-NEXT: ptr @StructTestGlobal +%struct.test = type { , } +@StructTestGlobal = global %struct.test zeroinitializer + +; CHECK-NEXT: Globals cannot contain scalable types +; CHECK-NEXT: ptr @StructArrayTestGlobal +%struct.array.test = type { [2 x ] } +@StructArrayTestGlobal = global %struct.array.test zeroinitializer + +; CHECK-NEXT: Globals cannot contain scalable types +; CHECK-NEXT: ptr @StructTargetTestGlobal +%struct.target.test = type { target("aarch64.svcount"), target("aarch64.svcount") } +@StructTargetTestGlobal = global %struct.target.test zeroinitializer From 403e4a2074910fcec139a8fbb77f3c2d15643916 Mon Sep 17 00:00:00 2001 From: SpencerAbson Date: Fri, 25 Oct 2024 13:03:07 +0100 Subject: [PATCH 009/425] [AArch64] Add assembly/disassembly for SVE zeroing int-float conversions (#113605) This patch adds assembly/disassembly for the following predicated SVE2.2 instructions - SCVTF (zeroing) - UCVTF (zeroing) - FCVTZS (zeroing) - FCVTZU (zeroing) - FLOGB (zeroing) - In accordance with: https://developer.arm.com/documentation/ddi0602/latest/ --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 9 +++ llvm/lib/Target/AArch64/SVEInstrFormats.td | 26 ++++++++ llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s | 2 +- .../MC/AArch64/SVE2p2/fcvtzs_z-diagnostics.s | 34 ++++++++++ llvm/test/MC/AArch64/SVE2p2/fcvtzs_z.s | 63 +++++++++++++++++++ .../MC/AArch64/SVE2p2/fcvtzu_z-diagnostics.s | 34 ++++++++++ llvm/test/MC/AArch64/SVE2p2/fcvtzu_z.s | 63 +++++++++++++++++++ .../MC/AArch64/SVE2p2/flogb_z-diagnostics.s | 47 ++++++++++++++ llvm/test/MC/AArch64/SVE2p2/flogb_z.s | 33 ++++++++++ .../MC/AArch64/SVE2p2/scvtf_z-diagnostics.s | 34 ++++++++++ llvm/test/MC/AArch64/SVE2p2/scvtf_z.s | 63 +++++++++++++++++++ .../MC/AArch64/SVE2p2/ucvtf_z-diagnostics.s | 34 ++++++++++ llvm/test/MC/AArch64/SVE2p2/ucvtf_z.s | 63 +++++++++++++++++++ 13 files changed, 504 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MC/AArch64/SVE2p2/fcvtzs_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/fcvtzs_z.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/fcvtzu_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/fcvtzu_z.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/flogb_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/flogb_z.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/scvtf_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/scvtf_z.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/ucvtf_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/ucvtf_z.s diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 59859cb7442d59d..bf6e25438633c7d 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4227,6 +4227,15 @@ let Predicates = [HasSVE2p2orSME2p2] in { // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>; + // Floating-point convert to integer, zeroing predicate + defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs">; + defm FCVTZU_ZPzZ : sve_fp_z2op_p_zd_d<0b1, "fcvtzu">; + // Integer convert to floating-point, zeroing predicate + defm SCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b0, "scvtf">; + defm UCVTF_ZPzZ : sve_fp_z2op_p_zd_c<0b1, "ucvtf">; + // Signed integer base 2 logarithm of fp value, zeroing predicate + defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb">; + // Floating point round to integral fp value in integer size range // Merging defm FRINT32Z_ZPmZ : sve_fp_2op_p_zd_frint<0b00, "frint32z">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index fc2e889d3a1a038..af5c96eb5c8c561 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3181,6 +3181,32 @@ multiclass sve_fp_z2op_p_zd_frint opc, string asm> { def _D : sve_fp_z2op_p_zd<{ 0b0010, opc{1}, 1, opc{0} }, asm, ZPR64, ZPR64>; } +multiclass sve_fp_z2op_p_zd_d { + def _HtoH : sve_fp_z2op_p_zd<{ 0b011101, U }, asm, ZPR16, ZPR16>; + def _HtoS : sve_fp_z2op_p_zd<{ 0b011110, U }, asm, ZPR16, ZPR32>; + def _HtoD : sve_fp_z2op_p_zd<{ 0b011111, U }, asm, ZPR16, ZPR64>; + def _StoS : sve_fp_z2op_p_zd<{ 0b101110, U }, asm, ZPR32, ZPR32>; + def _StoD : sve_fp_z2op_p_zd<{ 0b111110, U }, asm, ZPR32, ZPR64>; + def _DtoS : sve_fp_z2op_p_zd<{ 0b111100, U }, asm, ZPR64, ZPR32>; + def _DtoD : sve_fp_z2op_p_zd<{ 0b111111, U }, asm, ZPR64, ZPR64>; +} + +multiclass sve_fp_z2op_p_zd_c { + def _HtoH : sve_fp_z2op_p_zd<{ 0b011001, U }, asm, ZPR16, ZPR16>; + def _StoH : sve_fp_z2op_p_zd<{ 0b011010, U }, asm, ZPR32, ZPR16>; + def _StoS : sve_fp_z2op_p_zd<{ 0b101010, U }, asm, ZPR32, ZPR32>; + def _StoD : sve_fp_z2op_p_zd<{ 0b111000, U }, asm, ZPR32, ZPR64>; + def _DtoS : sve_fp_z2op_p_zd<{ 0b111010, U }, asm, ZPR64, ZPR32>; + def _DtoH : sve_fp_z2op_p_zd<{ 0b011011, U }, asm, ZPR64, ZPR16>; + def _DtoD : sve_fp_z2op_p_zd<{ 0b111011, U }, asm, ZPR64, ZPR64>; +} + +multiclass sve_fp_z2op_p_zd_d_flogb { + def _H : sve_fp_z2op_p_zd<0b0011001, asm, ZPR16, ZPR16>; + def _S : sve_fp_z2op_p_zd<0b0011010, asm, ZPR32, ZPR32>; + def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>; +} + //===----------------------------------------------------------------------===// // SVE Integer Arithmetic - Binary Predicated Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s index ddb8c4ff35b6a31..60d9f9e5e242d58 100644 --- a/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s @@ -14,7 +14,7 @@ flogb z0.b, p0/m, z0.b // Invalid predicate operation flogb z0.s, p0/z, z0.s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: flogb z0.s, p0/z, z0.s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z-diagnostics.s new file mode 100644 index 000000000000000..1408cba4070bf4b --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z-diagnostics.s @@ -0,0 +1,34 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +fcvtzs z0.h, p0/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtzs z0.h, p0/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtzs z0.h, p0/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtzs z0.h, p0/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +fcvtzs z0.h, p8/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: fcvtzs z0.h, p8/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.d, p0/z, z7.d +fcvtzs z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fcvtzs z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +fcvtzs z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fcvtzs z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z.s b/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z.s new file mode 100644 index 000000000000000..a37f83c0f97b7ca --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// convert from half + +fcvtzs z0.h, p0/z, z0.h // 01100100-01011110-11000000-00000000 +// CHECK-INST: fcvtzs z0.h, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xc0,0x5e,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645ec000 + +fcvtzs z23.s, p3/z, z13.h // 01100100-01011111-10001101-10110111 +// CHECK-INST: fcvtzs z23.s, p3/z, z13.h +// CHECK-ENCODING: [0xb7,0x8d,0x5f,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645f8db7 + +fcvtzs z31.d, p7/z, z31.h // 01100100-01011111-11011111-11111111 +// CHECK-INST: fcvtzs z31.d, p7/z, z31.h +// CHECK-ENCODING: [0xff,0xdf,0x5f,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645fdfff + +// convert from single + +fcvtzs z0.s, p0/z, z0.s // 01100100-10011111-10000000-00000000 +// CHECK-INST: fcvtzs z0.s, p0/z, z0.s +// CHECK-ENCODING: [0x00,0x80,0x9f,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 649f8000 + +fcvtzs z21.d, p5/z, z10.s // 01100100-11011111-10010101-01010101 +// CHECK-INST: fcvtzs z21.d, p5/z, z10.s +// CHECK-ENCODING: [0x55,0x95,0xdf,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64df9555 + +// convert from double + +fcvtzs z23.s, p3/z, z13.d // 01100100-11011110-10001101-10110111 +// CHECK-INST: fcvtzs z23.s, p3/z, z13.d +// CHECK-ENCODING: [0xb7,0x8d,0xde,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64de8db7 + +fcvtzs z31.d, p7/z, z31.d // 01100100-11011111-11011111-11111111 +// CHECK-INST: fcvtzs z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xdf,0xdf,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dfdfff diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z-diagnostics.s new file mode 100644 index 000000000000000..fc4ecda82bd2007 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z-diagnostics.s @@ -0,0 +1,34 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +fcvtzu z0.h, p0/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtzu z0.h, p0/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtzu z0.h, p0/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtzu z0.h, p0/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +fcvtzu z0.h, p8/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: fcvtzu z0.h, p8/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.d, p0/z, z7.d +fcvtzu z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fcvtzu z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +fcvtzu z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fcvtzu z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z.s b/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z.s new file mode 100644 index 000000000000000..df1ac4016689b78 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// convert from half + +fcvtzu z0.h, p0/z, z0.h // 01100100-01011110-11100000-00000000 +// CHECK-INST: fcvtzu z0.h, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xe0,0x5e,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645ee000 + +fcvtzu z21.s, p5/z, z10.h // 01100100-01011111-10110101-01010101 +// CHECK-INST: fcvtzu z21.s, p5/z, z10.h +// CHECK-ENCODING: [0x55,0xb5,0x5f,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645fb555 + +fcvtzu z23.d, p3/z, z13.h // 01100100-01011111-11101101-10110111 +// CHECK-INST: fcvtzu z23.d, p3/z, z13.h +// CHECK-ENCODING: [0xb7,0xed,0x5f,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645fedb7 + +// convert from single + +fcvtzu z21.s, p5/z, z10.s // 01100100-10011111-10110101-01010101 +// CHECK-INST: fcvtzu z21.s, p5/z, z10.s +// CHECK-ENCODING: [0x55,0xb5,0x9f,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 649fb555 + +fcvtzu z31.d, p7/z, z31.s // 01100100-11011111-10111111-11111111 +// CHECK-INST: fcvtzu z31.d, p7/z, z31.s +// CHECK-ENCODING: [0xff,0xbf,0xdf,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dfbfff + +// convert from double + +fcvtzu z0.s, p0/z, z0.d // 01100100-11011110-10100000-00000000 +// CHECK-INST: fcvtzu z0.s, p0/z, z0.d +// CHECK-ENCODING: [0x00,0xa0,0xde,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dea000 + +fcvtzu z31.d, p7/z, z31.d // 01100100-11011111-11111111-11111111 +// CHECK-INST: fcvtzu z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xff,0xdf,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dfffff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/flogb_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/flogb_z-diagnostics.s new file mode 100644 index 000000000000000..8fd528e1fc05d4a --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/flogb_z-diagnostics.s @@ -0,0 +1,47 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid element width + +flogb z0.b, p0/z, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: flogb z0.b, p0/z, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +flogb z0.h, p0/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: flogb z0.h, p0/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +flogb z0.s, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: flogb z0.s, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +flogb z0.d, p0/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: flogb z0.d, p0/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +flogb z0.h, p8/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: flogb z0.h, p8/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.d, p0/z, z7.d +flogb z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: flogb z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +flogb z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: flogb z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/flogb_z.s b/llvm/test/MC/AArch64/SVE2p2/flogb_z.s new file mode 100644 index 000000000000000..1b056aa928ce234 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/flogb_z.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +flogb z0.h, p0/z, z0.h // 01100100-00011110-10100000-00000000 +// CHECK-INST: flogb z0.h, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xa0,0x1e,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 641ea000 + +flogb z23.s, p3/z, z13.s // 01100100-00011110-11001101-10110111 +// CHECK-INST: flogb z23.s, p3/z, z13.s +// CHECK-ENCODING: [0xb7,0xcd,0x1e,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 641ecdb7 + +flogb z31.d, p7/z, z31.d // 01100100-00011110-11111111-11111111 +// CHECK-INST: flogb z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xff,0x1e,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 641effff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/scvtf_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/scvtf_z-diagnostics.s new file mode 100644 index 000000000000000..9dd089271462596 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/scvtf_z-diagnostics.s @@ -0,0 +1,34 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +scvtf z0.s, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: scvtf z0.s, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +scvtf z0.d, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: scvtf z0.d, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +scvtf z0.h, p8/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: scvtf z0.h, p8/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.d, p0/z, z7.d +scvtf z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: scvtf z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +scvtf z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: scvtf z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/scvtf_z.s b/llvm/test/MC/AArch64/SVE2p2/scvtf_z.s new file mode 100644 index 000000000000000..b8898c6485f6199 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/scvtf_z.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// convert to half + +scvtf z0.h, p0/z, z0.h // 01100100-01011100-11000000-00000000 +// CHECK-INST: scvtf z0.h, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xc0,0x5c,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645cc000 + +scvtf z21.h, p5/z, z10.s // 01100100-01011101-10010101-01010101 +// CHECK-INST: scvtf z21.h, p5/z, z10.s +// CHECK-ENCODING: [0x55,0x95,0x5d,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645d9555 + +scvtf z31.h, p7/z, z31.d // 01100100-01011101-11011111-11111111 +// CHECK-INST: scvtf z31.h, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xdf,0x5d,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645ddfff + +// convert to single + +scvtf z0.s, p0/z, z0.s // 01100100-10011101-10000000-00000000 +// CHECK-INST: scvtf z0.s, p0/z, z0.s +// CHECK-ENCODING: [0x00,0x80,0x9d,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 649d8000 + +scvtf z23.s, p3/z, z13.d // 01100100-11011101-10001101-10110111 +// CHECK-INST: scvtf z23.s, p3/z, z13.d +// CHECK-ENCODING: [0xb7,0x8d,0xdd,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dd8db7 + +// convert to double + +scvtf z21.d, p5/z, z10.s // 01100100-11011100-10010101-01010101 +// CHECK-INST: scvtf z21.d, p5/z, z10.s +// CHECK-ENCODING: [0x55,0x95,0xdc,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dc9555 + +scvtf z31.d, p7/z, z31.d // 01100100-11011101-11011111-11111111 +// CHECK-INST: scvtf z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xdf,0xdd,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dddfff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/ucvtf_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/ucvtf_z-diagnostics.s new file mode 100644 index 000000000000000..1317428dafde03b --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/ucvtf_z-diagnostics.s @@ -0,0 +1,34 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +ucvtf z0.s, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: ucvtf z0.s, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ucvtf z0.d, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: ucvtf z0.d, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +ucvtf z0.h, p8/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ucvtf z0.h, p8/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.d, p0/z, z7.d +ucvtf z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: ucvtf z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +ucvtf z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: ucvtf z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/ucvtf_z.s b/llvm/test/MC/AArch64/SVE2p2/ucvtf_z.s new file mode 100644 index 000000000000000..9e87afd60b4055b --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/ucvtf_z.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// convert to half + +ucvtf z0.h, p0/z, z0.h // 01100100-01011100-11100000-00000000 +// CHECK-INST: ucvtf z0.h, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xe0,0x5c,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645ce000 + +ucvtf z21.h, p5/z, z10.s // 01100100-01011101-10110101-01010101 +// CHECK-INST: ucvtf z21.h, p5/z, z10.s +// CHECK-ENCODING: [0x55,0xb5,0x5d,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645db555 + +ucvtf z31.h, p7/z, z31.d // 01100100-01011101-11111111-11111111 +// CHECK-INST: ucvtf z31.h, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xff,0x5d,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 645dffff + +// convert to single + +ucvtf z23.s, p3/z, z13.s // 01100100-10011101-10101101-10110111 +// CHECK-INST: ucvtf z23.s, p3/z, z13.s +// CHECK-ENCODING: [0xb7,0xad,0x9d,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 649dadb7 + +ucvtf z21.s, p5/z, z10.d // 01100100-11011101-10110101-01010101 +// CHECK-INST: ucvtf z21.s, p5/z, z10.d +// CHECK-ENCODING: [0x55,0xb5,0xdd,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64ddb555 + +// convert to double + +ucvtf z0.d, p0/z, z0.s // 01100100-11011100-10100000-00000000 +// CHECK-INST: ucvtf z0.d, p0/z, z0.s +// CHECK-ENCODING: [0x00,0xa0,0xdc,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dca000 + +ucvtf z31.d, p7/z, z31.d // 01100100-11011101-11111111-11111111 +// CHECK-INST: ucvtf z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xff,0xdd,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64ddffff \ No newline at end of file From 183ddefff60f560466ec3bf271c6008067d746db Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Fri, 25 Oct 2024 15:06:32 +0300 Subject: [PATCH 010/425] [MC][ELF] Have an assert catching creating group sections without signatures. (#113553) --- llvm/include/llvm/MC/MCSectionELF.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h index d43ffbd885c961e..f09d30591a3cf60 100644 --- a/llvm/include/llvm/MC/MCSectionELF.h +++ b/llvm/include/llvm/MC/MCSectionELF.h @@ -62,6 +62,8 @@ class MCSectionELF final : public MCSection { type == ELF::SHT_NOBITS, Begin), Type(type), Flags(flags), UniqueID(UniqueID), EntrySize(entrySize), Group(group, IsComdat), LinkedToSym(LinkedToSym) { + assert((!(Flags & ELF::SHF_GROUP) || Group.getPointer()) && + "Group section without signature!"); if (Group.getPointer()) Group.getPointer()->setIsSignature(); } From 43dda4bbeb59c9b327b9b5b1fe917a7d8a5bf3bc Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 25 Oct 2024 13:08:39 +0100 Subject: [PATCH 011/425] [clang][doc] Add release note for changes to `-fveclib={ArmPL,SLEEF}` (#113673) Changed in #112580. --- clang/docs/ReleaseNotes.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ed0c0e369fca74c..9e1558d8acc99f4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -302,6 +302,11 @@ Modified Compiler Flags the ``promoted`` algorithm for complex division when possible rather than the less basic (limited range) algorithm. +- The ``-fveclib`` option has been updated to enable ``-fno-math-errno`` for + ``-fveclib=ArmPL`` and ``-fveclib=SLEEF``. This gives Clang more opportunities + to utilize these vector libraries. The behavior for all other vector function + libraries remains unchanged. + Removed Compiler Flags ------------------------- From 6854ad90e39e9d119c990043f573db7157a2b097 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 25 Oct 2024 14:22:11 +0200 Subject: [PATCH 012/425] [bazel][lldb] "Fix" the build after b852fb1ec5fa15f0b913cc4988cbd09239b19904 b852fb1ec5fa15f0b913cc4988cbd09239b19904 split out ValueObject, but it's still all a big pile of dependency spaghetti so just build it with Core. --- utils/bazel/llvm-project-overlay/lldb/BUILD.bazel | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel index 9dba2efc34f6139..91eb04db3ee9b53 100644 --- a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel @@ -607,7 +607,10 @@ gentbl_cc_library( cc_library( name = "CoreHeaders", - hdrs = glob(["include/lldb/Core/**/*.h"]), + hdrs = glob([ + "include/lldb/Core/**/*.h", + "include/lldb/ValueObject/**/*.h", # This should be its own library. + ]), strip_include_prefix = "include", deps = [ ":BreakpointHeaders", @@ -627,8 +630,14 @@ cc_library( cc_library( name = "Core", - srcs = glob(["source/Core/**/*.cpp"]), - hdrs = glob(["include/lldb/Core/**/*.h"]), + srcs = glob([ + "source/Core/**/*.cpp", + "source/ValueObject/**/*.cpp", # This should be its own library. + ]), + hdrs = glob([ + "include/lldb/Core/**/*.h", + "include/lldb/ValueObject/**/*.h", # This should be its own library. + ]), strip_include_prefix = "include", deps = [ ":BreakpointHeaders", From 32baf2917357b4e5a566ff43c6e0156e5ad10cb8 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 25 Oct 2024 14:48:38 +0200 Subject: [PATCH 013/425] Make NoopLattice.h self-contained --- clang/include/clang/Analysis/FlowSensitive/NoopLattice.h | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/include/clang/Analysis/FlowSensitive/NoopLattice.h b/clang/include/clang/Analysis/FlowSensitive/NoopLattice.h index 0938091cd689f02..96c695473b67a19 100644 --- a/clang/include/clang/Analysis/FlowSensitive/NoopLattice.h +++ b/clang/include/clang/Analysis/FlowSensitive/NoopLattice.h @@ -14,6 +14,7 @@ #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_NOOP_LATTICE_H #include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Support/Compiler.h" #include "llvm/ADT/Any.h" #include From 49940514e2c26ad82d86abee59ba4c2f6d8ec07d Mon Sep 17 00:00:00 2001 From: CarolineConcatto Date: Fri, 25 Oct 2024 13:59:46 +0100 Subject: [PATCH 014/425] [CLANG][AArch64] Add the modal 8 bit floating-point scalar type (#97277) ARM ACLE PR#323[1] adds new modal types for 8-bit floating point intrinsic. From the PR#323: ``` ACLE defines the `__mfp8` type, which can be used for the E5M2 and E4M3 8-bit floating-point formats. It is a storage and interchange only type with no arithmetic operations other than intrinsic calls. ```` The type should be an opaque type and its format in undefined in Clang. Only defined in the backend by a status/format register, for AArch64 the FPMR. This patch is an attempt to the add the mfloat8_t scalar type. It has a parser and codegen for the new scalar type. The patch it is lowering to and 8bit unsigned as it has no format. But maybe we should add another opaque type. [1] https://github.com/ARM-software/acle/pull/323 --- .../clang/Basic/AArch64SVEACLETypes.def | 2 +- .../include/clang/Serialization/ASTBitCodes.h | 2 +- clang/lib/CodeGen/CGDebugInfo.cpp | 7 ++ clang/test/AST/arm-mfp8.cpp | 91 +++++++++++++++++++ clang/test/CodeGen/aarch64-debug-types.c | 9 ++ clang/test/CodeGen/arm-mfp8.c | 34 +++++++ clang/test/Modules/no-external-type-id.cppm | 2 +- clang/test/Sema/arm-mfp8.c | 11 +++ clang/test/Sema/arm-mfp8.cpp | 34 ++++++- clang/utils/TableGen/NeonEmitter.cpp | 1 + 10 files changed, 189 insertions(+), 4 deletions(-) create mode 100644 clang/test/AST/arm-mfp8.cpp create mode 100644 clang/test/CodeGen/aarch64-debug-types.c create mode 100644 clang/test/Sema/arm-mfp8.c diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 25abf5f3f86b7d2..62f6087e9624662 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -107,7 +107,6 @@ AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId) #endif - //===- Vector point types -----------------------------------------------===// SVE_VECTOR_TYPE_INT("__SVInt8_t", "__SVInt8_t", SveInt8, SveInt8Ty, 16, 8, 1, true) @@ -201,6 +200,7 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy) +AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8_t", "__MFloat8_t", MFloat8, MFloat8Ty, 1, 8, 1) AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1) AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 13173dc96e71aed..99232fd21357904 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1149,7 +1149,7 @@ enum PredefinedTypeIDs { /// /// Type IDs for non-predefined types will start at /// NUM_PREDEF_TYPE_IDs. -const unsigned NUM_PREDEF_TYPE_IDS = 511; +const unsigned NUM_PREDEF_TYPE_IDS = 512; // Ensure we do not overrun the predefined types we reserved // in the enum PredefinedTypeIDs above. diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 27bbbfc6f531a10..59a761c2303c951 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -783,6 +783,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { #define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/AArch64SVEACLETypes.def" { + if (BT->getKind() == BuiltinType::MFloat8) { + Encoding = llvm::dwarf::DW_ATE_unsigned_char; + BTName = BT->getName(CGM.getLangOpts()); + // Bit size and offset of the type. + uint64_t Size = CGM.getContext().getTypeSize(BT); + return DBuilder.createBasicType(BTName, Size, Encoding); + } ASTContext::BuiltinVectorTypeInfo Info = // For svcount_t, only the lower 2 bytes are relevant. BT->getKind() == BuiltinType::SveCount diff --git a/clang/test/AST/arm-mfp8.cpp b/clang/test/AST/arm-mfp8.cpp new file mode 100644 index 000000000000000..a00d055f7d96794 --- /dev/null +++ b/clang/test/AST/arm-mfp8.cpp @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -std=c++11 -triple aarch64-arm-none-eabi -target-feature -fp8 -ast-dump %s | \ +// RUN: FileCheck %s --strict-whitespace + +// REQUIRES: aarch64-registered-target || arm-registered-target + +/* Various contexts where type __mfp8 can appear. */ + +#include +/* Namespace */ +namespace { + __mfp8 f2n; + __mfp8 arr1n[10]; +} + +//CHECK: |-NamespaceDecl {{.*}} +//CHECK-NEXT: | |-VarDecl {{.*}} f2n '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | `-VarDecl {{.*}} arr1n '__mfp8[10]' + + + const __mfp8 func1n(const __mfp8 mfp8) { + // this should fail + __mfp8 f1n; + f1n = mfp8; + return f1n; + } +//CHECK: |-FunctionDecl {{.*}} func1n 'const __mfp8 (const __mfp8)' +//CHECK: | `-VarDecl {{.*}} f1n '__mfp8':'__MFloat8_t' +//CHECK-NEXT: |-BinaryOperator {{.*}} '__mfp8':'__MFloat8_t' lvalue '=' +//CHECK-NEXT: | |-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue Var {{.*}} 'f1n' '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | `-DeclRefExpr {{.*}} 'const __mfp8':'const __MFloat8_t' lvalue ParmVar {{.*}} 'mfp8' 'const __mfp8':'const __MFloat8_t' +//CHECK-NEXT: `-ReturnStmt {{.*}} +//CHECK-NEXT: `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' +//CHECK-NEXT: `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue Var {{.*}} 'f1n' '__mfp8':'__MFloat8_t' + + +/* Class */ + +class C1 { + __mfp8 f1c; + static const __mfp8 f2c; + volatile __MFloat8_t f3c; +public: + C1(__mfp8 arg) : f1c(arg), f3c(arg) { } + __mfp8 func1c(__mfp8 arg ) { + return arg; + } + static __mfp8 func2c(__mfp8 arg) { + return arg; + } +}; + +//CHECK: | |-CXXRecordDecl {{.*}} referenced class C1 +//CHECK-NEXT: | |-FieldDecl {{.*}} f1c '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | |-VarDecl {{.*}} f2c 'const __mfp8':'const __MFloat8_t' static +//CHECK-NEXT: | |-FieldDecl {{.*}} f3c 'volatile __MFloat8_t' +//CHECK-NEXT: | |-AccessSpecDecl {{.*}} +//CHECK-NEXT: | |-CXXConstructorDecl {{.*}} C1 'void (__mfp8)' implicit-inline +//CHECK-NEXT: | | |-ParmVarDecl {{.*}} arg '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | |-CXXCtorInitializer {{.*}} 'f1c' '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | | `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}} 'arg' '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | |-CXXCtorInitializer {{.*}} 'f3c' 'volatile __MFloat8_t' +//CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | | `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}} 'arg' '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | `-CompoundStmt {{.*}} +//CHECK-NEXT: | |-CXXMethodDecl {{.*}} func1c '__mfp8 (__mfp8)' implicit-inline +//CHECK-NEXT: | | |-ParmVarDecl {{.*}} arg '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | `-CompoundStmt {{.*}} +//CHECK-NEXT: | | `-ReturnStmt {{.*}} +//CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}}8 'arg' '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | `-CXXMethodDecl {{.*}} func2c '__mfp8 (__mfp8)' static implicit-inline +//CHECK-NEXT: | |-ParmVarDecl {{.*}} arg '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | `-CompoundStmt {{.*}} +//CHECK-NEXT: | `-ReturnStmt {{.*}} +//CHECK-NEXT: | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}} 'arg' '__mfp8':'__MFloat8_t' + +template struct S1 { + C mem1; +}; + +template <> struct S1<__mfp8> { + __mfp8 mem2; +}; + +//CHECK: |-TemplateArgument type '__MFloat8_t' +//CHECK-NEXT: | `-BuiltinType {{.*}} '__MFloat8_t' +//CHECK-NEXT: |-CXXRecordDecl {{.*}} implicit struct S1 +//CHECK-NEXT: `-FieldDecl {{.*}} mem2 '__mfp8':'__MFloat8_t' diff --git a/clang/test/CodeGen/aarch64-debug-types.c b/clang/test/CodeGen/aarch64-debug-types.c new file mode 100644 index 000000000000000..c109610023ed49e --- /dev/null +++ b/clang/test/CodeGen/aarch64-debug-types.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 \ +// RUN: -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s +#include + +void test_locals(void) { + // CHECK-DAG: !DIDerivedType(tag: DW_TAG_typedef, name: "__MFloat8_t", {{.*}}, baseType: ![[ELTTYU8:[0-9]+]] + // CHECK-DAG: ![[ELTTYU8]] = !DIBasicType(name: "__MFloat8_t", size: 8, encoding: DW_ATE_unsigned_char) + __MFloat8_t mfp8; +} diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c index 35ec24c8a7880db..8c817fd5be1c9be 100644 --- a/clang/test/CodeGen/arm-mfp8.c +++ b/clang/test/CodeGen/arm-mfp8.c @@ -47,5 +47,39 @@ mfloat8x8_t test_ret_mfloat8x8_t(mfloat8x8_t v) { return v; } +// CHECK-C-LABEL: define dso_local <1 x i8> @func1n( +// CHECK-C-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { +// CHECK-C-NEXT: [[ENTRY:.*:]] +// CHECK-C-NEXT: [[MFP8_ADDR:%.*]] = alloca <1 x i8>, align 1 +// CHECK-C-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 +// CHECK-C-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[MFP8_ADDR]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: store <1 x i8> [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-C-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 +// CHECK-C-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 +// CHECK-C-NEXT: ret <1 x i8> [[TMP1]] +// +// CHECK-CXX-LABEL: define dso_local <1 x i8> @_Z6func1nu11__MFloat8_t( +// CHECK-CXX-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: [[MFP8_ADDR:%.*]] = alloca <1 x i8>, align 1 +// CHECK-CXX-NEXT: [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1 +// CHECK-CXX-NEXT: store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <1 x i8>, ptr [[MFP8_ADDR]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: store <1 x i8> [[TMP0]], ptr [[ARRAYIDX]], align 1 +// CHECK-CXX-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2 +// CHECK-CXX-NEXT: [[TMP1:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1 +// CHECK-CXX-NEXT: ret <1 x i8> [[TMP1]] +// +__mfp8 func1n(__mfp8 mfp8) { + __mfp8 f1n[10]; + f1n[2] = mfp8; + return f1n[2]; +} + + + //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: // CHECK: {{.*}} diff --git a/clang/test/Modules/no-external-type-id.cppm b/clang/test/Modules/no-external-type-id.cppm index b8b987403812f23..6385f3a8aa00b24 100644 --- a/clang/test/Modules/no-external-type-id.cppm +++ b/clang/test/Modules/no-external-type-id.cppm @@ -23,7 +23,7 @@ export module b; import a; export int b(); -// CHECK: +__mfp8 test_cast_from_float(unsigned in) { + return (__mfp8)in; // expected-error {{used type '__mfp8' (aka '__MFloat8_t') where arithmetic or pointer type is required}} +} + +unsigned test_cast_to_int(__mfp8 in) { + return (unsigned)in; // expected-error {{operand of type '__mfp8' (aka '__MFloat8_t') where arithmetic or pointer type is required}} +} diff --git a/clang/test/Sema/arm-mfp8.cpp b/clang/test/Sema/arm-mfp8.cpp index f270168faceb328..e882c382522c223 100644 --- a/clang/test/Sema/arm-mfp8.cpp +++ b/clang/test/Sema/arm-mfp8.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify=sve,neon -triple aarch64-arm-none-eabi \ +// RUN: %clang_cc1 -fsyntax-only -verify=sve,neon,scalar -triple aarch64-arm-none-eabi \ // RUN: -target-feature -fp8 -target-feature +sve -target-feature +neon %s // REQUIRES: aarch64-registered-target @@ -29,3 +29,35 @@ void test_vector(mfloat8x8_t a, mfloat8x16_t b, uint8x8_t c) { c * b; // neon-error {{cannot convert between vector and non-scalar values ('uint8x8_t' (vector of 8 'uint8_t' values) and 'mfloat8x16_t' (aka '__MFloat8x16_t'))}} c / b; // neon-error {{cannot convert between vector and non-scalar values ('uint8x8_t' (vector of 8 'uint8_t' values) and 'mfloat8x16_t' (aka '__MFloat8x16_t'))}} } +__mfp8 test_static_cast_from_char(char in) { + return static_cast<__mfp8>(in); // scalar-error {{static_cast from 'char' to '__mfp8' (aka '__MFloat8_t') is not allowed}} +} + +char test_static_cast_to_char(__mfp8 in) { + return static_cast(in); // scalar-error {{static_cast from '__mfp8' (aka '__MFloat8_t') to 'char' is not allowed}} +} +void test(bool b) { + __mfp8 mfp8; + + mfp8 + mfp8; // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and '__mfp8')}} + mfp8 - mfp8; // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and '__mfp8')}} + mfp8 * mfp8; // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and '__mfp8')}} + mfp8 / mfp8; // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and '__mfp8')}} + ++mfp8; // scalar-error {{cannot increment value of type '__mfp8' (aka '__MFloat8_t')}} + --mfp8; // scalar-error {{cannot decrement value of type '__mfp8' (aka '__MFloat8_t')}} + + char u8; + + mfp8 + u8; // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and 'char')}} + u8 + mfp8; // scalar-error {{invalid operands to binary expression ('char' and '__mfp8' (aka '__MFloat8_t'))}} + mfp8 - u8; // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and 'char')}} + u8 - mfp8; // scalar-error {{invalid operands to binary expression ('char' and '__mfp8' (aka '__MFloat8_t'))}} + mfp8 * u8; // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and 'char')}} + u8 * mfp8; // scalar-error {{invalid operands to binary expression ('char' and '__mfp8' (aka '__MFloat8_t'))}} + mfp8 / u8; // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and 'char')}} + u8 / mfp8; // scalar-error {{invalid operands to binary expression ('char' and '__mfp8' (aka '__MFloat8_t'))}} + mfp8 = u8; // scalar-error {{assigning to '__mfp8' (aka '__MFloat8_t') from incompatible type 'char'}} + u8 = mfp8; // scalar-error {{assigning to 'char' from incompatible type '__mfp8' (aka '__MFloat8_t')}} + mfp8 + (b ? u8 : mfp8); // scalar-error {{incompatible operand types ('char' and '__mfp8' (aka '__MFloat8_t'))}} +} + diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 35c8fcf69910b6a..8caba774649a2b4 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2588,6 +2588,7 @@ void NeonEmitter::runVectorTypes(raw_ostream &OS) { OS << "typedef __fp16 float16_t;\n"; OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n"; + OS << "typedef __MFloat8_t __mfp8;\n"; OS << "typedef __MFloat8x8_t mfloat8x8_t;\n"; OS << "typedef __MFloat8x16_t mfloat8x16_t;\n"; OS << "typedef double float64_t;\n"; From a69d2a18d207947a25838dd01d2116bee384b75b Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 25 Oct 2024 09:05:27 -0400 Subject: [PATCH 015/425] [gn] try to port b1be21394e9c more Bots are currently failing check-hwasan with llvm-lit: .../llvm/utils/lit/lit/llvm/subst.py:133: fatal: Did not find hwasan_symbolize in /.../out/gn/lib/clang/20/lib/linux Maybe this fixes that. See also llvm/utils/gn/secondary/compiler-rt/lib/hwasan/scripts/BUILD.gn. --- llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn index 7194c47b1becc48..520f3b6c01665fa 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn @@ -55,7 +55,7 @@ write_cmake_config("lit_common_configured") { "COMPILER_RT_ENABLE_INTERNAL_SYMBOLIZER_PYBOOL=False", "COMPILER_RT_HAS_NO_DEFAULT_CONFIG_FLAG_PYBOOL=True", "COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL=False", - "COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR=" + rebase_path(crt_current_out_dir), + "COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR=" + rebase_path("$root_out_dir/bin"), "COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR=" + rebase_path(crt_current_out_dir), "COMPILER_RT_RESOLVED_OUTPUT_DIR=" + rebase_path(crt_current_out_dir), From 4a6b56960f445d111adc9aef799acad8c6ca41f0 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 25 Oct 2024 09:19:21 -0400 Subject: [PATCH 016/425] [gn] try to port b1be21394e9c even more Bots are now failing check-hwasan with llvm-lit: .../llvm/utils/lit/lit/llvm/subst.py:133: fatal: Did not find hwasan_symbolize in .../out/gn/stage2_unix/bin This time it's the right fix for sure! --- llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn index 520f3b6c01665fa..020f3e7d9acd7bb 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn @@ -55,7 +55,8 @@ write_cmake_config("lit_common_configured") { "COMPILER_RT_ENABLE_INTERNAL_SYMBOLIZER_PYBOOL=False", "COMPILER_RT_HAS_NO_DEFAULT_CONFIG_FLAG_PYBOOL=True", "COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL=False", - "COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR=" + rebase_path("$root_out_dir/bin"), + "COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR=" + + rebase_path("$root_build_dir/bin"), "COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR=" + rebase_path(crt_current_out_dir), "COMPILER_RT_RESOLVED_OUTPUT_DIR=" + rebase_path(crt_current_out_dir), From 83e7e6a0ff9363da89d8917937407f9d37caec83 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 25 Oct 2024 13:20:14 +0000 Subject: [PATCH 017/425] [gn build] Port 2e43a304f10f --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 0a2552f4b0616e4..1630c8004d31575 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -954,6 +954,14 @@ if (current_toolchain == default_toolchain) { "__utility/to_underlying.h", "__utility/unreachable.h", "__variant/monostate.h", + "__vector/comparison.h", + "__vector/container_traits.h", + "__vector/erase.h", + "__vector/pmr.h", + "__vector/swap.h", + "__vector/vector.h", + "__vector/vector_bool.h", + "__vector/vector_bool_formatter.h", "__verbose_abort", "algorithm", "any", From a3dd6000a732d0b9a7019abd0dba5613786b9ff4 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 25 Oct 2024 13:20:15 +0000 Subject: [PATCH 018/425] [gn build] Port c4248fa3edd3 --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 1bbec962ff68d5d..0a97bcf59112b42 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -245,6 +245,8 @@ copy("Headers") { "mmintrin.h", "module.modulemap", "movdirintrin.h", + "movrs_avx10_2_512intrin.h", + "movrs_avx10_2intrin.h", "msa.h", "mwaitxintrin.h", "nmmintrin.h", From b3703fa50485cf90b04105e6a223ccdd1e29c9af Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Fri, 25 Oct 2024 13:26:58 +0000 Subject: [PATCH 019/425] [AArch64]Update test aarch64-debug-types.c This patch fix the failing tests by adding REQUIRES: aarch64-registered-target This tests was failing in non aarch64 cpu. The test was introduced by: [CLANG][AArch64] Add the modal 8 bit floating-point scalar type (#97277) --- clang/test/CodeGen/aarch64-debug-types.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/test/CodeGen/aarch64-debug-types.c b/clang/test/CodeGen/aarch64-debug-types.c index c109610023ed49e..f1ab74c5c31bdb1 100644 --- a/clang/test/CodeGen/aarch64-debug-types.c +++ b/clang/test/CodeGen/aarch64-debug-types.c @@ -1,5 +1,8 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 \ // RUN: -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s + +// REQUIRES: aarch64-registered-target + #include void test_locals(void) { From 9043bdbce4ab1c6f07e72ddfedf7165bdf2b3e40 Mon Sep 17 00:00:00 2001 From: Boaz Brickner Date: Fri, 25 Oct 2024 15:31:57 +0200 Subject: [PATCH 020/425] [clang] Output an error when [[lifetimebound]] attribute is applied on a function parameter while the function returns void (#113460) Fixes: https://github.com/llvm/llvm-project/issues/107556 --- clang/docs/ReleaseNotes.rst | 9 +++++++++ clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 +++ clang/lib/Sema/SemaDecl.cpp | 14 +++++++++++++- clang/test/SemaCXX/attr-lifetimebound.cpp | 12 ++++++++++-- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9e1558d8acc99f4..170c4cc280537f9 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -133,6 +133,15 @@ C++ Specific Potentially Breaking Changes // Fixed version: unsigned operator""_udl_name(unsigned long long); +- Clang will now produce an error diagnostic when [[clang::lifetimebound]] is + applied on a parameter of a function that returns void. This was previously + ignored and had no effect. (#GH107556) + + .. code-block:: c++ + + // Now diagnoses with an error. + void f(int& i [[clang::lifetimebound]]); + ABI Changes in This Version --------------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 8e4718008ece726..9b9bdd7c800e37a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10097,6 +10097,9 @@ def err_lifetimebound_no_object_param : Error< def err_lifetimebound_ctor_dtor : Error< "'lifetimebound' attribute cannot be applied to a " "%select{constructor|destructor}0">; +def err_lifetimebound_void_return_type : Error< + "'lifetimebound' attribute cannot be applied to a parameter of a function " + "that returns void">; // CHECK: returning address/reference of stack memory def warn_ret_stack_addr_ref : Warning< diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 25061f02c13f6ca..f8e5f3c6d309d67 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -6940,7 +6940,7 @@ static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) { } } - // Check the attributes on the function type, if any. + // Check the attributes on the function type and function params, if any. if (const auto *FD = dyn_cast(&ND)) { // Don't declare this variable in the second operand of the for-statement; // GCC miscompiles that by ending its lifetime before evaluating the @@ -6970,6 +6970,18 @@ static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) { } } } + + for (unsigned int I = 0; I < FD->getNumParams(); ++I) { + const ParmVarDecl *P = FD->getParamDecl(I); + + // The [[lifetimebound]] attribute can be applied to a function parameter + // only if the function returns a value. + if (auto *A = P->getAttr()) { + if (!isa(FD) && FD->getReturnType()->isVoidType()) { + S.Diag(A->getLocation(), diag::err_lifetimebound_void_return_type); + } + } + } } } diff --git a/clang/test/SemaCXX/attr-lifetimebound.cpp b/clang/test/SemaCXX/attr-lifetimebound.cpp index 1c5c79777c71c8d..804d61fb62ca402 100644 --- a/clang/test/SemaCXX/attr-lifetimebound.cpp +++ b/clang/test/SemaCXX/attr-lifetimebound.cpp @@ -1,8 +1,7 @@ // RUN: %clang_cc1 -std=c++23 -verify %s namespace usage_invalid { - // FIXME: Should we diagnose a void return type? - void voidreturn(int ¶m [[clang::lifetimebound]]); + void void_return(int ¶m [[clang::lifetimebound]]); // expected-error {{'lifetimebound' attribute cannot be applied to a parameter of a function that returns void}} int *not_class_member() [[clang::lifetimebound]]; // expected-error {{non-member function has no implicit object parameter}} struct A { @@ -12,6 +11,8 @@ namespace usage_invalid { int *explicit_object(this A&) [[clang::lifetimebound]]; // expected-error {{explicit object member function has no implicit object parameter}} int not_function [[clang::lifetimebound]]; // expected-error {{only applies to parameters and implicit object parameters}} int [[clang::lifetimebound]] also_not_function; // expected-error {{cannot be applied to types}} + // FIXME: Should diagnose a void return type. + void void_return_member() [[clang::lifetimebound]]; }; int *attr_with_param(int ¶m [[clang::lifetimebound(42)]]); // expected-error {{takes no arguments}} } @@ -31,6 +32,13 @@ namespace usage_ok { return *(int*)param; } + template R dependent_void(const T& t [[clang::lifetimebound]]); + void dependent_void_instantiation() { + dependent_void(1); // OK: Returns void. + int x = dependent_void(1); // expected-warning {{temporary whose address is used as value of local variable 'x' will be destroyed at the end of the full-expression}} + dependent_void(1); // OK: Returns an unused value. + } + struct A { A(); A(int); From 21ecd4a9dff312b26f7a276dc78d1ddb3707e7c1 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Fri, 25 Oct 2024 17:03:15 +0300 Subject: [PATCH 021/425] [AArch64][PAC] Factor out the emission of pointer check sequence (NFC) (#110702) When pointer is authenticated or resigned, it may be required to explicitly check the authenticated value to prevent introducing signing or authentication oracles. While the check sequence is expensive in general, a more efficient sequence can be emitted under specific assumptions. This commit factors out the emission of the code sequence to check the authenticated pointer value in preparation for adding other variants of checking code, as it is currently done when emitting tail calls. --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 168 ++++++++++-------- 1 file changed, 91 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index a982ea67a0f2792..6d2dd0ecbccf317 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -150,6 +150,12 @@ class AArch64AsmPrinter : public AsmPrinter { // Emit the sequence for BRA/BLRA (authenticate + branch/call). void emitPtrauthBranch(const MachineInstr *MI); + void emitPtrauthCheckAuthenticatedValue(Register TestedReg, + Register ScratchReg, + AArch64PACKey::ID Key, + bool ShouldTrap, + const MCSymbol *OnFailure); + // Emit the sequence for AUT or AUTPAC. void emitPtrauthAuthResign(const MachineInstr *MI); @@ -1719,45 +1725,37 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, return AArch64::X17; } -void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { - const bool IsAUTPAC = MI->getOpcode() == AArch64::AUTPAC; - - // We can expand AUT/AUTPAC into 3 possible sequences: - // - unchecked: - // autia x16, x0 - // pacib x16, x1 ; if AUTPAC +/// Emits a code sequence to check an authenticated pointer value. +/// +/// If OnFailure argument is passed, jump there on check failure instead +/// of proceeding to the next instruction (only if ShouldTrap is false). +void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue( + Register TestedReg, Register ScratchReg, AArch64PACKey::ID Key, + bool ShouldTrap, const MCSymbol *OnFailure) { + // Insert a sequence to check if authentication of TestedReg succeeded, + // such as: // // - checked and clearing: - // mov x17, x0 - // movk x17, #disc, lsl #48 - // autia x16, x17 + // ; x16 is TestedReg, x17 is ScratchReg // mov x17, x16 // xpaci x17 // cmp x16, x17 // b.eq Lsuccess // mov x16, x17 // b Lend - // Lsuccess: - // mov x17, x1 - // movk x17, #disc, lsl #48 - // pacib x16, x17 - // Lend: - // Where we only emit the AUT if we started with an AUT. + // Lsuccess: + // ; skipped if authentication failed + // Lend: + // ... // // - checked and trapping: - // mov x17, x0 - // movk x17, #disc, lsl #48 - // autia x16, x0 // mov x17, x16 // xpaci x17 // cmp x16, x17 // b.eq Lsuccess // brk #<0xc470 + aut key> - // Lsuccess: - // mov x17, x1 - // movk x17, #disc, lsl #48 - // pacib x16, x17 ; if AUTPAC - // Where the b.eq skips over the trap if the PAC is valid. + // Lsuccess: + // ... // // This sequence is expensive, but we need more information to be able to // do better. @@ -1770,6 +1768,71 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { // Either way, we also don't always know whether TBI is enabled or not for // the specific target environment. + unsigned XPACOpc = getXPACOpcodeForKey(Key); + + MCSymbol *SuccessSym = createTempSymbol("auth_success_"); + + // mov Xscratch, Xtested + emitMovXReg(ScratchReg, TestedReg); + + // xpac(i|d) Xscratch + EmitToStreamer(MCInstBuilder(XPACOpc).addReg(ScratchReg).addReg(ScratchReg)); + + // cmp Xtested, Xscratch + EmitToStreamer(MCInstBuilder(AArch64::SUBSXrs) + .addReg(AArch64::XZR) + .addReg(TestedReg) + .addReg(ScratchReg) + .addImm(0)); + + // b.eq Lsuccess + EmitToStreamer(MCInstBuilder(AArch64::Bcc) + .addImm(AArch64CC::EQ) + .addExpr(MCSymbolRefExpr::create(SuccessSym, OutContext))); + + if (ShouldTrap) { + assert(!OnFailure && "Cannot specify OnFailure with ShouldTrap"); + // Trapping sequences do a 'brk'. + // brk #<0xc470 + aut key> + EmitToStreamer(MCInstBuilder(AArch64::BRK).addImm(0xc470 | Key)); + } else { + // Non-trapping checked sequences return the stripped result in TestedReg, + // skipping over success-only code (such as re-signing the pointer) if + // there is one. + // Note that this can introduce an authentication oracle (such as based on + // the high bits of the re-signed value). + + // FIXME: Can we simply return the AUT result, already in TestedReg? + // mov Xtested, Xscratch + emitMovXReg(TestedReg, ScratchReg); + + if (OnFailure) { + // b Lend + EmitToStreamer( + MCInstBuilder(AArch64::B) + .addExpr(MCSymbolRefExpr::create(OnFailure, OutContext))); + } + } + + // If the auth check succeeds, we can continue. + // Lsuccess: + OutStreamer->emitLabel(SuccessSym); +} + +void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { + const bool IsAUTPAC = MI->getOpcode() == AArch64::AUTPAC; + + // We expand AUT/AUTPAC into a sequence of the form + // + // ; authenticate x16 + // ; check pointer in x16 + // Lsuccess: + // ; sign x16 (if AUTPAC) + // Lend: ; if not trapping on failure + // + // with the checking sequence chosen depending on whether we should check + // the pointer and whether we should trap on failure. + // By default, auth/resign sequences check for auth failures. bool ShouldCheck = true; // In the checked sequence, we only trap if explicitly requested. @@ -1800,8 +1863,6 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { uint64_t AUTDisc = MI->getOperand(1).getImm(); unsigned AUTAddrDisc = MI->getOperand(2).getReg(); - unsigned XPACOpc = getXPACOpcodeForKey(AUTKey); - // Compute aut discriminator into x17 assert(isUInt<16>(AUTDisc)); unsigned AUTDiscReg = emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc); @@ -1824,59 +1885,12 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { MCSymbol *EndSym = nullptr; - // Checked sequences do an additional strip-and-compare. if (ShouldCheck) { - MCSymbol *SuccessSym = createTempSymbol("auth_success_"); - - // XPAC has tied src/dst: use x17 as a temporary copy. - // mov x17, x16 - emitMovXReg(AArch64::X17, AArch64::X16); - - // xpaci x17 - EmitToStreamer( - *OutStreamer, - MCInstBuilder(XPACOpc).addReg(AArch64::X17).addReg(AArch64::X17)); - - // cmp x16, x17 - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXrs) - .addReg(AArch64::XZR) - .addReg(AArch64::X16) - .addReg(AArch64::X17) - .addImm(0)); - - // b.eq Lsuccess - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::Bcc) - .addImm(AArch64CC::EQ) - .addExpr(MCSymbolRefExpr::create( - SuccessSym, OutContext))); - - if (ShouldTrap) { - // Trapping sequences do a 'brk'. - // brk #<0xc470 + aut key> - EmitToStreamer(*OutStreamer, - MCInstBuilder(AArch64::BRK).addImm(0xc470 | AUTKey)); - } else { - // Non-trapping checked sequences return the stripped result in x16, - // skipping over the PAC if there is one. - - // FIXME: can we simply return the AUT result, already in x16? without.. - // ..traps this is usable as an oracle anyway, based on high bits - // mov x17, x16 - emitMovXReg(AArch64::X16, AArch64::X17); - - if (IsAUTPAC) { - EndSym = createTempSymbol("resign_end_"); - - // b Lend - EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::B) - .addExpr(MCSymbolRefExpr::create( - EndSym, OutContext))); - } - } + if (IsAUTPAC && !ShouldTrap) + EndSym = createTempSymbol("resign_end_"); - // If the auth check succeeds, we can continue. - // Lsuccess: - OutStreamer->emitLabel(SuccessSym); + emitPtrauthCheckAuthenticatedValue(AArch64::X16, AArch64::X17, AUTKey, + ShouldTrap, EndSym); } // We already emitted unchecked and checked-but-non-trapping AUTs. From 800a47d6cd33ea1c2a888ceb67d566366c61e7ed Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 25 Oct 2024 10:17:16 -0400 Subject: [PATCH 022/425] [libc++][NFC] Fix include guards inside locale_base_api --- libcxx/include/__locale_dir/locale_base_api/android.h | 6 +++--- .../__locale_dir/locale_base_api/bsd_locale_defaults.h | 6 +++--- .../__locale_dir/locale_base_api/bsd_locale_fallbacks.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/fuchsia.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/ibm.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/locale_guard.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/musl.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/newlib.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/openbsd.h | 6 +++--- libcxx/include/__locale_dir/locale_base_api/win32.h | 6 +++--- 10 files changed, 30 insertions(+), 30 deletions(-) diff --git a/libcxx/include/__locale_dir/locale_base_api/android.h b/libcxx/include/__locale_dir/locale_base_api/android.h index 9965d8bbf6a2ecc..08ef5407dedf4e0 100644 --- a/libcxx/include/__locale_dir/locale_base_api/android.h +++ b/libcxx/include/__locale_dir/locale_base_api/android.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H #include @@ -47,4 +47,4 @@ inline _LIBCPP_HIDE_FROM_ABI double strtod_l(const char* __nptr, char** __endptr # endif // __NDK_MAJOR__ <= 16 #endif // __has_include() -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h index 1f9607209842cad..e88eb4fa41d7af9 100644 --- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h +++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h @@ -11,8 +11,8 @@ // we will define the mapping from an internal macro to the real BSD symbol. //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -33,4 +33,4 @@ #define __libcpp_asprintf_l(...) asprintf_l(__VA_ARGS__) #define __libcpp_sscanf_l(...) sscanf_l(__VA_ARGS__) -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h index 76b94287cd6cc88..5f99c7aea02a96a 100644 --- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h +++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h @@ -10,8 +10,8 @@ // of those functions for non-BSD platforms. //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H #include <__locale_dir/locale_base_api/locale_guard.h> #include @@ -123,4 +123,4 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l( _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H diff --git a/libcxx/include/__locale_dir/locale_base_api/fuchsia.h b/libcxx/include/__locale_dir/locale_base_api/fuchsia.h index 4c3440f981c6d08..f6ef454ba7ada75 100644 --- a/libcxx/include/__locale_dir/locale_base_api/fuchsia.h +++ b/libcxx/include/__locale_dir/locale_base_api/fuchsia.h @@ -7,12 +7,12 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FUCHSIA_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FUCHSIA_H #include <__support/xlocale/__posix_l_fallback.h> #include <__support/xlocale/__strtonum_fallback.h> #include #include -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FUCHSIA_H diff --git a/libcxx/include/__locale_dir/locale_base_api/ibm.h b/libcxx/include/__locale_dir/locale_base_api/ibm.h index fa3bc1c3633f5dc..1d1d15df9f7995e 100644 --- a/libcxx/include/__locale_dir/locale_base_api/ibm.h +++ b/libcxx/include/__locale_dir/locale_base_api/ibm.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H #if defined(__MVS__) # include <__support/ibm/locale_mgmt_zos.h> @@ -105,4 +105,4 @@ _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char return str_size; } -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H diff --git a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__locale_dir/locale_base_api/locale_guard.h index 2baacb51cd06555..7d15f2d253adc39 100644 --- a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h +++ b/libcxx/include/__locale_dir/locale_base_api/locale_guard.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H #include <__config> #include <__locale> // for locale_t @@ -75,4 +75,4 @@ struct __libcpp_locale_guard { _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H diff --git a/libcxx/include/__locale_dir/locale_base_api/musl.h b/libcxx/include/__locale_dir/locale_base_api/musl.h index bf7b849d5863421..1653214cdba1e39 100644 --- a/libcxx/include/__locale_dir/locale_base_api/musl.h +++ b/libcxx/include/__locale_dir/locale_base_api/musl.h @@ -14,8 +14,8 @@ // in Musl. //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H #include #include @@ -28,4 +28,4 @@ inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, c return ::strtoull(__nptr, __endptr, __base); } -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H diff --git a/libcxx/include/__locale_dir/locale_base_api/newlib.h b/libcxx/include/__locale_dir/locale_base_api/newlib.h index a8c1cff16e6d800..7da10e5889843dd 100644 --- a/libcxx/include/__locale_dir/locale_base_api/newlib.h +++ b/libcxx/include/__locale_dir/locale_base_api/newlib.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H diff --git a/libcxx/include/__locale_dir/locale_base_api/openbsd.h b/libcxx/include/__locale_dir/locale_base_api/openbsd.h index 0c05d6a0f788747..d4fb224e0c80a09 100644 --- a/libcxx/include/__locale_dir/locale_base_api/openbsd.h +++ b/libcxx/include/__locale_dir/locale_base_api/openbsd.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H #include <__support/xlocale/__strtonum_fallback.h> #include @@ -16,4 +16,4 @@ #include #include -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H diff --git a/libcxx/include/__locale_dir/locale_base_api/win32.h b/libcxx/include/__locale_dir/locale_base_api/win32.h index f66baffb6920456..f488a0dc0d69b3f 100644 --- a/libcxx/include/__locale_dir/locale_base_api/win32.h +++ b/libcxx/include/__locale_dir/locale_base_api/win32.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_WIN32_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_WIN32_H #include <__config> #include @@ -232,4 +232,4 @@ _LIBCPP_EXPORTED_FROM_ABI int vasprintf_l(char** __ret, locale_t __loc, const ch // not-so-pressing FIXME: use locale to determine blank characters inline int iswblank_l(wint_t __c, locale_t /*loc*/) { return (__c == L' ' || __c == L'\t'); } -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_WIN32_H From 577c7dd7cc4c5a9f62f9654cfa30ee9d55709426 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 25 Oct 2024 15:20:24 +0100 Subject: [PATCH 023/425] [AArch64] Add a phase-ordering test for vectorizing predicated selects. NFC --- .../AArch64/predicated-reduction.ll | 294 ++++++++++++++++++ 1 file changed, 294 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll new file mode 100644 index 000000000000000..7274e952567693d --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes="default" -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64" + +define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) { +; CHECK-LABEL: define nofpclass(nan inf) double @monte_simple( +; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr nocapture noundef readonly [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0 +; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[V1_011:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V1_1:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[V0_010:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V0_1:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]] +; CHECK-NEXT: [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]] +; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00 +; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[SUB]], [[V0_010]] +; CHECK-NEXT: [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]] +; CHECK-NEXT: [[ADD4:%.*]] = fadd fast double [[MUL3]], [[V1_011]] +; CHECK-NEXT: [[V0_1]] = select i1 [[CMP1]], double [[ADD]], double [[V0_010]] +; CHECK-NEXT: [[V1_1]] = select i1 [[CMP1]], double [[ADD4]], double [[V1_011]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]] +; CHECK-NEXT: br label %[[FOR_END]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[ADD5:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[FOR_END_LOOPEXIT]] ] +; CHECK-NEXT: ret double [[ADD5]] +; +entry: + %nblocks.addr = alloca i32, align 4 + %RAND_BLOCK_LENGTH.addr = alloca i32, align 4 + %samples.addr = alloca ptr, align 8 + %Y.addr = alloca double, align 8 + %Z.addr = alloca double, align 8 + %i = alloca i32, align 4 + %block = alloca i32, align 4 + %rngVal = alloca double, align 8 + %callValue = alloca double, align 8 + %v0 = alloca double, align 8 + %v1 = alloca double, align 8 + store i32 %nblocks, ptr %nblocks.addr, align 4 + store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4 + store ptr %samples, ptr %samples.addr, align 8 + store double %Y, ptr %Y.addr, align 8 + store double %Z, ptr %Z.addr, align 8 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2 + call void @llvm.lifetime.start.p0(i64 4, ptr %block) #2 + call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #2 + call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #2 + call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #2 + store double 0.000000e+00, ptr %v0, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #2 + store double 0.000000e+00, ptr %v1, align 8 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4 + %1 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load ptr, ptr %samples.addr, align 8 + %3 = load i32, ptr %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom + %4 = load float, ptr %arrayidx, align 4 + %conv = fpext float %4 to double + store double %conv, ptr %rngVal, align 8 + %5 = load double, ptr %Y.addr, align 8 + %6 = load double, ptr %rngVal, align 8 + %mul = fmul fast double %5, %6 + %7 = load double, ptr %Z.addr, align 8 + %sub = fsub fast double %mul, %7 + store double %sub, ptr %callValue, align 8 + %8 = load double, ptr %callValue, align 8 + %cmp1 = fcmp fast ogt double %8, 0.000000e+00 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %9 = load double, ptr %callValue, align 8 + %10 = load double, ptr %v0, align 8 + %add = fadd fast double %10, %9 + store double %add, ptr %v0, align 8 + %11 = load double, ptr %callValue, align 8 + %12 = load double, ptr %callValue, align 8 + %mul3 = fmul fast double %11, %12 + %13 = load double, ptr %v1, align 8 + %add4 = fadd fast double %13, %mul3 + store double %add4, ptr %v1, align 8 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %14 = load i32, ptr %i, align 4 + %inc = add nsw i32 %14, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %15 = load double, ptr %v0, align 8 + %16 = load double, ptr %v1, align 8 + %add5 = fadd fast double %15, %16 + call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #2 + call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #2 + call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #2 + call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #2 + call void @llvm.lifetime.end.p0(i64 4, ptr %block) #2 + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2 + ret double %add5 +} + +define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) { +; CHECK-LABEL: define nofpclass(nan inf) double @monte_exp( +; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr noundef [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[NBLOCKS]], 0 +; CHECK-NEXT: br i1 [[CMP16]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END10:.*]] +; CHECK: [[FOR_BODY_LR_PH]]: +; CHECK-NEXT: [[CMP211:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0 +; CHECK-NEXT: br i1 [[CMP211]], label %[[FOR_BODY_US_PREHEADER:.*]], label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY_US_PREHEADER]]: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64 +; CHECK-NEXT: br label %[[FOR_BODY_US:.*]] +; CHECK: [[FOR_BODY_US]]: +; CHECK-NEXT: [[V1_019_US:%.*]] = phi double [ [[V1_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US:.*]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ] +; CHECK-NEXT: [[V0_018_US:%.*]] = phi double [ [[V0_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ] +; CHECK-NEXT: [[BLOCK_017_US:%.*]] = phi i32 [ [[INC9_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0, %[[FOR_BODY_US_PREHEADER]] ] +; CHECK-NEXT: tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]]) +; CHECK-NEXT: br label %[[FOR_BODY3_US:.*]] +; CHECK: [[FOR_BODY3_US]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_US]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY3_US]] ] +; CHECK-NEXT: [[V1_114_US:%.*]] = phi double [ [[V1_019_US]], %[[FOR_BODY_US]] ], [ [[V1_2_US]], %[[FOR_BODY3_US]] ] +; CHECK-NEXT: [[V0_113_US:%.*]] = phi double [ [[V0_018_US]], %[[FOR_BODY_US]] ], [ [[V0_2_US]], %[[FOR_BODY3_US]] ] +; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_US]], align 4 +; CHECK-NEXT: [[CONV_US:%.*]] = fpext float [[TMP0]] to double +; CHECK-NEXT: [[TMP1:%.*]] = tail call fast double @llvm.exp2.f64(double [[CONV_US]]) +; CHECK-NEXT: [[MUL_US:%.*]] = fmul fast double [[TMP1]], [[Y]] +; CHECK-NEXT: [[SUB_US:%.*]] = fsub fast double [[MUL_US]], [[Z]] +; CHECK-NEXT: [[CMP4_US:%.*]] = fcmp fast ogt double [[SUB_US]], 0.000000e+00 +; CHECK-NEXT: [[ADD_US:%.*]] = fadd fast double [[SUB_US]], [[V0_113_US]] +; CHECK-NEXT: [[MUL6_US:%.*]] = fmul fast double [[SUB_US]], [[SUB_US]] +; CHECK-NEXT: [[ADD7_US:%.*]] = fadd fast double [[MUL6_US]], [[V1_114_US]] +; CHECK-NEXT: [[V0_2_US]] = select i1 [[CMP4_US]], double [[ADD_US]], double [[V0_113_US]] +; CHECK-NEXT: [[V1_2_US]] = select i1 [[CMP4_US]], double [[ADD7_US]], double [[V1_114_US]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND25_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND25_NOT]], label %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]], label %[[FOR_BODY3_US]] +; CHECK: [[FOR_COND1_FOR_INC8_CRIT_EDGE_US]]: +; CHECK-NEXT: [[INC9_US]] = add nuw nsw i32 [[BLOCK_017_US]], 1 +; CHECK-NEXT: [[EXITCOND26_NOT:%.*]] = icmp eq i32 [[INC9_US]], [[NBLOCKS]] +; CHECK-NEXT: br i1 [[EXITCOND26_NOT]], label %[[FOR_END10]], label %[[FOR_BODY_US]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[BLOCK_017:%.*]] = phi i32 [ [[INC9:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]]) +; CHECK-NEXT: [[INC9]] = add nuw nsw i32 [[BLOCK_017]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC9]], [[NBLOCKS]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END10]], label %[[FOR_BODY]] +; CHECK: [[FOR_END10]]: +; CHECK-NEXT: [[V0_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V0_2_US]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ] +; CHECK-NEXT: [[V1_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V1_2_US]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ] +; CHECK-NEXT: [[ADD11:%.*]] = fadd fast double [[V1_0_LCSSA]], [[V0_0_LCSSA]] +; CHECK-NEXT: ret double [[ADD11]] +; +entry: + %nblocks.addr = alloca i32, align 4 + %RAND_BLOCK_LENGTH.addr = alloca i32, align 4 + %samples.addr = alloca ptr, align 8 + %Y.addr = alloca double, align 8 + %Z.addr = alloca double, align 8 + %i = alloca i32, align 4 + %block = alloca i32, align 4 + %rngVal = alloca double, align 8 + %callValue = alloca double, align 8 + %v0 = alloca double, align 8 + %v1 = alloca double, align 8 + store i32 %nblocks, ptr %nblocks.addr, align 4 + store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4 + store ptr %samples, ptr %samples.addr, align 8 + store double %Y, ptr %Y.addr, align 8 + store double %Z, ptr %Z.addr, align 8 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #4 + call void @llvm.lifetime.start.p0(i64 4, ptr %block) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #4 + call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #4 + store double 0.000000e+00, ptr %v0, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #4 + store double 0.000000e+00, ptr %v1, align 8 + store i32 0, ptr %block, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc8, %entry + %0 = load i32, ptr %block, align 4 + %1 = load i32, ptr %nblocks.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end10 + +for.body: ; preds = %for.cond + %2 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4 + %3 = load ptr, ptr %samples.addr, align 8 + call void @resample(i32 noundef %2, ptr noundef %3) + store i32 0, ptr %i, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %4 = load i32, ptr %i, align 4 + %5 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4 + %cmp2 = icmp slt i32 %4, %5 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %6 = load ptr, ptr %samples.addr, align 8 + %7 = load i32, ptr %i, align 4 + %idxprom = sext i32 %7 to i64 + %arrayidx = getelementptr inbounds float, ptr %6, i64 %idxprom + %8 = load float, ptr %arrayidx, align 4 + %conv = fpext float %8 to double + store double %conv, ptr %rngVal, align 8 + %9 = load double, ptr %Y.addr, align 8 + %10 = load double, ptr %rngVal, align 8 + %11 = call fast double @llvm.exp2.f64(double %10) + %mul = fmul fast double %9, %11 + %12 = load double, ptr %Z.addr, align 8 + %sub = fsub fast double %mul, %12 + store double %sub, ptr %callValue, align 8 + %13 = load double, ptr %callValue, align 8 + %cmp4 = fcmp fast ogt double %13, 0.000000e+00 + br i1 %cmp4, label %if.then, label %if.end + +if.then: ; preds = %for.body3 + %14 = load double, ptr %callValue, align 8 + %15 = load double, ptr %v0, align 8 + %add = fadd fast double %15, %14 + store double %add, ptr %v0, align 8 + %16 = load double, ptr %callValue, align 8 + %17 = load double, ptr %callValue, align 8 + %mul6 = fmul fast double %16, %17 + %18 = load double, ptr %v1, align 8 + %add7 = fadd fast double %18, %mul6 + store double %add7, ptr %v1, align 8 + br label %if.end + +if.end: ; preds = %if.then, %for.body3 + br label %for.inc + +for.inc: ; preds = %if.end + %19 = load i32, ptr %i, align 4 + %inc = add nsw i32 %19, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc8 + +for.inc8: ; preds = %for.end + %20 = load i32, ptr %block, align 4 + %inc9 = add nsw i32 %20, 1 + store i32 %inc9, ptr %block, align 4 + br label %for.cond + +for.end10: ; preds = %for.cond + %21 = load double, ptr %v0, align 8 + %22 = load double, ptr %v1, align 8 + %add11 = fadd fast double %21, %22 + call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #4 + call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #4 + call void @llvm.lifetime.end.p0(i64 4, ptr %block) #4 + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #4 + ret double %add11 +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @resample(i32 noundef, ptr noundef) +declare double @llvm.exp2.f64(double) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) From 9f6c632ecda08bfff76b798c46d5d7cfde57b5e9 Mon Sep 17 00:00:00 2001 From: Andrea Faulds Date: Fri, 25 Oct 2024 16:21:59 +0200 Subject: [PATCH 024/425] [mlir][mlir-spirv-cpu-runner] Move MLIR pass pipeline to mlir-opt (#113594) Adds a new mlir-opt test-only pass, -test-spirv-cpu-runner-pipeline, which runs the set of MLIR passes needed for the mlir-spirv-cpu-runner, and removes them from the runner. The tests are changed to invoke mlir-opt with this flag before running the runner. The eventual goal is to move all host/device code generation steps out of the runner, like with some of the other runners. Recommit of 17e9752267ed9c81c8da87f3a6d0e01f130b0d04. It was reverted due to a build failure, but the build failure had in fact already been fixed in e7302319b52e3d231216d54d10622b0698928a96. --- mlir/test/lib/Pass/CMakeLists.txt | 1 + .../lib/Pass/TestSPIRVCPURunnerPipeline.cpp | 47 +++++++++++++++++++ mlir/test/mlir-spirv-cpu-runner/double.mlir | 3 +- .../mlir-spirv-cpu-runner/simple_add.mlir | 3 +- mlir/tools/mlir-opt/mlir-opt.cpp | 2 + .../mlir-spirv-cpu-runner.cpp | 24 ---------- 6 files changed, 54 insertions(+), 26 deletions(-) create mode 100644 mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp diff --git a/mlir/test/lib/Pass/CMakeLists.txt b/mlir/test/lib/Pass/CMakeLists.txt index b190f054e50bd1c..f489b7e51e5038a 100644 --- a/mlir/test/lib/Pass/CMakeLists.txt +++ b/mlir/test/lib/Pass/CMakeLists.txt @@ -3,6 +3,7 @@ get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS) add_mlir_library(MLIRTestPass TestDynamicPipeline.cpp TestPassManager.cpp + TestSPIRVCPURunnerPipeline.cpp EXCLUDE_FROM_LIBMLIR diff --git a/mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp b/mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp new file mode 100644 index 000000000000000..ded0d22c31307e9 --- /dev/null +++ b/mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp @@ -0,0 +1,47 @@ +//===------------------ TestSPIRVCPURunnerPipeline.cpp --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements a pipeline for use by mlir-spirv-cpu-runner tests. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h" +#include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h" +#include "mlir/Dialect/GPU/Transforms/Passes.h" +#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" +#include "mlir/Dialect/SPIRV/Transforms/Passes.h" +#include "mlir/Pass/PassManager.h" + +using namespace mlir; + +namespace { + +void buildTestSPIRVCPURunnerPipeline(OpPassManager &passManager) { + passManager.addPass(createGpuKernelOutliningPass()); + passManager.addPass(createConvertGPUToSPIRVPass(/*mapMemorySpace=*/true)); + + OpPassManager &nestedPM = passManager.nest(); + nestedPM.addPass(spirv::createSPIRVLowerABIAttributesPass()); + nestedPM.addPass(spirv::createSPIRVUpdateVCEPass()); + passManager.addPass(createLowerHostCodeToLLVMPass()); + passManager.addPass(createConvertSPIRVToLLVMPass()); +} + +} // namespace + +namespace mlir { +namespace test { +void registerTestSPIRVCPURunnerPipeline() { + PassPipelineRegistration<>( + "test-spirv-cpu-runner-pipeline", + "Runs a series of passes for lowering SPIR-V-dialect MLIR to " + "LLVM-dialect MLIR intended for mlir-spirv-cpu-runner.", + buildTestSPIRVCPURunnerPipeline); +} +} // namespace test +} // namespace mlir diff --git a/mlir/test/mlir-spirv-cpu-runner/double.mlir b/mlir/test/mlir-spirv-cpu-runner/double.mlir index cd551ffb1bd0623..35557ba1e94c003 100644 --- a/mlir/test/mlir-spirv-cpu-runner/double.mlir +++ b/mlir/test/mlir-spirv-cpu-runner/double.mlir @@ -1,4 +1,5 @@ -// RUN: mlir-spirv-cpu-runner %s -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \ +// RUN: mlir-opt %s -test-spirv-cpu-runner-pipeline \ +// RUN: | mlir-spirv-cpu-runner - -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \ // RUN: | FileCheck %s // CHECK: [8, 8, 8, 8, 8, 8] diff --git a/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir b/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir index 119e973e45e4a7b..75675a69a675833 100644 --- a/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir +++ b/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir @@ -1,4 +1,5 @@ -// RUN: mlir-spirv-cpu-runner %s -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \ +// RUN: mlir-opt %s -test-spirv-cpu-runner-pipeline \ +// RUN: | mlir-spirv-cpu-runner - -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \ // RUN: | FileCheck %s // CHECK: data = diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 36b142484bb04a6..002c3900056dee1 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -142,6 +142,7 @@ void registerTestSCFWhileOpBuilderPass(); void registerTestSCFWrapInZeroTripCheckPasses(); void registerTestShapeMappingPass(); void registerTestSliceAnalysisPass(); +void registerTestSPIRVCPURunnerPipeline(); void registerTestSPIRVFuncSignatureConversion(); void registerTestSPIRVVectorUnrolling(); void registerTestTensorCopyInsertionPass(); @@ -278,6 +279,7 @@ void registerTestPasses() { mlir::test::registerTestSCFWrapInZeroTripCheckPasses(); mlir::test::registerTestShapeMappingPass(); mlir::test::registerTestSliceAnalysisPass(); + mlir::test::registerTestSPIRVCPURunnerPipeline(); mlir::test::registerTestSPIRVFuncSignatureConversion(); mlir::test::registerTestSPIRVVectorUnrolling(); mlir::test::registerTestTensorCopyInsertionPass(); diff --git a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp index 7e0b51cac806213..22ad1024db4a0b6 100644 --- a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp +++ b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp @@ -12,18 +12,12 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h" -#include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h" -#include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" -#include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h" -#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h" -#include "mlir/Dialect/SPIRV/Transforms/Passes.h" #include "mlir/ExecutionEngine/JitRunner.h" #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/Pass/Pass.h" @@ -75,23 +69,6 @@ convertMLIRModule(Operation *op, llvm::LLVMContext &context) { return mainModule; } -static LogicalResult runMLIRPasses(Operation *module, - JitRunnerOptions &options) { - PassManager passManager(module->getContext(), - module->getName().getStringRef()); - if (failed(applyPassManagerCLOptions(passManager))) - return failure(); - passManager.addPass(createGpuKernelOutliningPass()); - passManager.addPass(createConvertGPUToSPIRVPass(/*mapMemorySpace=*/true)); - - OpPassManager &nestedPM = passManager.nest(); - nestedPM.addPass(spirv::createSPIRVLowerABIAttributesPass()); - nestedPM.addPass(spirv::createSPIRVUpdateVCEPass()); - passManager.addPass(createLowerHostCodeToLLVMPass()); - passManager.addPass(createConvertSPIRVToLLVMPass()); - return passManager.run(module); -} - int main(int argc, char **argv) { llvm::InitLLVM y(argc, argv); @@ -99,7 +76,6 @@ int main(int argc, char **argv) { llvm::InitializeNativeTargetAsmPrinter(); mlir::JitRunnerConfig jitRunnerConfig; - jitRunnerConfig.mlirTransformer = runMLIRPasses; jitRunnerConfig.llvmModuleBuilder = convertMLIRModule; mlir::DialectRegistry registry; From cbdfb18794026b0d662d7de1fa39c02ad6227abb Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Fri, 25 Oct 2024 15:39:07 +0100 Subject: [PATCH 025/425] [RISCV] Add Supm extension to RVA23 profiles (#113619) This is mandatory for both RVA23U64 and RVA23S64 in the ratified version of the specification . --- llvm/lib/Target/RISCV/RISCVProfiles.td | 3 ++- llvm/test/CodeGen/RISCV/attributes.ll | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVProfiles.td b/llvm/lib/Target/RISCV/RISCVProfiles.td index 157e087a64da07b..ce7d1973989fc13 100644 --- a/llvm/lib/Target/RISCV/RISCVProfiles.td +++ b/llvm/lib/Target/RISCV/RISCVProfiles.td @@ -73,7 +73,8 @@ defvar RVA23U64Features = !listconcat(RVA22U64Features, FeatureStdExtZcmop, FeatureStdExtZcb, FeatureStdExtZfa, - FeatureStdExtZawrs]); + FeatureStdExtZawrs, + FeatureStdExtSupm]); defvar RVA23S64BaseFeatures = !listconcat(RVA22S64BaseFeatures, [FeatureStdExtSvnapot, diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index e9743d484f776f0..9be9ddd05ee2900 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -578,8 +578,8 @@ ; RVA20S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zmmul1p0_za128rs1p0_ssccptr1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0" ; RVA22U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" ; RVA22S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscounterenw1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0_svinval1p0_svpbmt1p0" -; RVA23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0" -; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" +; RVA23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_supm1p0" +; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_supm1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" ; RVB23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" ; RVB23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" ; RVM23U32: .attribute 5, "rv32i2p1_m2p0_zicbop1p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zimop1p0_zmmul1p0_zca1p0_zcb1p0_zce1p0_zcmop1p0_zcmp1p0_zcmt1p0_zba1p0_zbb1p0_zbs1p0" From bbc0e631d2d3facd5952aeafc7400761813acc3a Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Fri, 25 Oct 2024 15:41:39 +0100 Subject: [PATCH 026/425] [MLIR] Remove unneeded LLVMDialect.h include in ControlFlowToSCF.cpp (#113560) This fixes the following failure when doing a clean build (in particular no .ninja* lying around) of lib/libMLIRControlFlowToSCF.a only: ``` In file included from llvm/include/llvm/IR/Module.h:22, from mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h:37, from mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp:19 llvm/include/llvm/IR/Attributes.h:90:14: fatal error: llvm/IR/Attributes.inc: No such file or directory ``` --- mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp b/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp index d3ee89743da9db5..1c592d665f3e4c5 100644 --- a/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp +++ b/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp @@ -16,7 +16,6 @@ #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/UB/IR/UBOps.h" #include "mlir/Pass/Pass.h" From e47bf3d08d51306f2e534951a1b77043dc540ceb Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Wed, 23 Oct 2024 16:13:39 -0400 Subject: [PATCH 027/425] [JIT] Fix crash in unit tests The unit tests `ReOptimizeLayerTest.BasicReOptimization` and `JITLinkRedirectionManagerTest.BasicRedirectionOperation` are failing for me with the error: ``` Program aborted due to an unhandled Error: Error value was Success. (Note: Success values must still be checked prior to being destroyed). ``` The error is raised when a value is assigned to `Err`, due to the the missing `ErrorAsOutParameter`. The fix is to move the error handling out of the constructor. --- .../Orc/JITLinkRedirectableSymbolManager.h | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h index 52f284c89bdade5..ef42cc5f798fd93 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h @@ -26,12 +26,16 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager, /// Create redirection manager that uses JITLink based implementaion. static Expected> Create(ObjectLinkingLayer &ObjLinkingLayer, JITDylib &JD) { - Error Err = Error::success(); - auto RM = std::unique_ptr( - new JITLinkRedirectableSymbolManager(ObjLinkingLayer, JD, Err)); - if (Err) - return Err; - return std::move(RM); + auto AnonymousPtrCreator(jitlink::getAnonymousPointerCreator( + ObjLinkingLayer.getExecutionSession().getTargetTriple())); + auto PtrJumpStubCreator(jitlink::getPointerJumpStubCreator( + ObjLinkingLayer.getExecutionSession().getTargetTriple())); + if (!AnonymousPtrCreator || !PtrJumpStubCreator) + return make_error("Architecture not supported", + inconvertibleErrorCode()); + return std::unique_ptr( + new JITLinkRedirectableSymbolManager( + ObjLinkingLayer, JD, AnonymousPtrCreator, PtrJumpStubCreator)); } void emitRedirectableSymbols(std::unique_ptr R, @@ -52,18 +56,13 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager, constexpr static StringRef JumpStubTableName = "$IND_JUMP_"; constexpr static StringRef StubPtrTableName = "$__IND_JUMP_PTRS"; - JITLinkRedirectableSymbolManager(ObjectLinkingLayer &ObjLinkingLayer, - JITDylib &JD, Error &Err) + JITLinkRedirectableSymbolManager( + ObjectLinkingLayer &ObjLinkingLayer, JITDylib &JD, + jitlink::AnonymousPointerCreator &AnonymousPtrCreator, + jitlink::PointerJumpStubCreator &PtrJumpStubCreator) : ObjLinkingLayer(ObjLinkingLayer), JD(JD), - AnonymousPtrCreator(jitlink::getAnonymousPointerCreator( - ObjLinkingLayer.getExecutionSession().getTargetTriple())), - PtrJumpStubCreator(jitlink::getPointerJumpStubCreator( - ObjLinkingLayer.getExecutionSession().getTargetTriple())) { - if (!AnonymousPtrCreator || !PtrJumpStubCreator) - Err = make_error("Architecture not supported", - inconvertibleErrorCode()); - if (Err) - return; + AnonymousPtrCreator(std::move(AnonymousPtrCreator)), + PtrJumpStubCreator(std::move(PtrJumpStubCreator)) { ObjLinkingLayer.getExecutionSession().registerResourceManager(*this); } From aba39c3974c7e43a83a9d647dca9b67caca8572e Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 25 Oct 2024 17:40:00 +0200 Subject: [PATCH 028/425] [System] Precommit of test for #112491 (#113704) --- .../SystemZ/vec-elt-insertion.ll | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll new file mode 100644 index 000000000000000..eb8dd72e0304d91 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll @@ -0,0 +1,128 @@ +; RUN: opt < %s -mtriple=s390x-unknown-linux -mcpu=z16 -S -passes=slp-vectorizer \ +; RUN: -pass-remarks-output=%t | FileCheck %s +; RUN: cat %t | FileCheck -check-prefix=REMARK %s +; +; NB! This is a pre-commit version (for #112491) with current codegen and remarks. +; +; Test functions that (at least currently) only gets vectorized if the +; insertion cost for an element load is counted as free. + +; This function needs the free element load to be recognized in SLP +; getGatherCost(). +define void @fun0(ptr nocapture %0, double %1) { +; CHECK-LABEL: define void @fun0( +; CHECK: fmul double +; CHECK: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.sqrt.f64( +; CHECK: fmul double +; CHECK: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.sqrt.f64( +; +; REMARK-LABEL: Function: fun0 +; REMARK: Args: +; REMARK-NEXT: - String: 'List vectorization was possible but not beneficial with cost ' +; REMARK-NEXT: - Cost: '0' + + %3 = fmul double %1, 2.000000e+00 + %4 = tail call double @llvm.fmuladd.f64(double %3, double %3, double 0.000000e+00) + %5 = tail call double @llvm.fmuladd.f64(double %3, double %3, double %4) + %sqrt1 = tail call double @llvm.sqrt.f64(double %5) + %6 = load double, ptr %0, align 8 + %7 = fmul double %6, 2.000000e+00 + %8 = tail call double @llvm.fmuladd.f64(double %7, double %7, double 0.000000e+00) + %9 = tail call double @llvm.fmuladd.f64(double %7, double %7, double %8) + %sqrt = tail call double @llvm.sqrt.f64(double %9) + %10 = fadd double %sqrt1, %sqrt + store double %10, ptr %0, align 8 + ret void +} + +; This function needs the element-load to be recognized in SystemZ +; getVectorInstrCost(). +define void @fun1(double %0) { +; CHECK-LABEL: define void @fun1( +; CHECK: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: phi double +; CHECK-NEXT: fsub double +; CHECK-NEXT: fsub double +; CHECK-NEXT: fmul double +; CHECK-NEXT: fmul double +; CHECK-NEXT: fsub double +; CHECK-NEXT: fsub double +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: fsub double +; CHECK-NEXT: fsub double +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK-NEXT: call double @llvm.fmuladd.f64( +; CHECK: fcmp olt double +; CHECK-NEXT: fcmp olt double +; CHECK-NEXT: or i1 +; +; REMARK-LABEL: Function: fun1 +; REMARK: Args: +; REMARK: - String: 'List vectorization was possible but not beneficial with cost ' +; REMARK-NEXT: - Cost: '0' + + br label %2 + +2: + %3 = phi double [ poison, %1 ], [ poison, %2 ] + %4 = phi double [ undef, %1 ], [ poison, %2 ] + %5 = phi double [ 0.000000e+00, %1 ], [ poison, %2 ] + %6 = phi double [ 0.000000e+00, %1 ], [ poison, %2 ] + %7 = phi double [ 0.000000e+00, %1 ], [ poison, %2 ] + %8 = phi double [ 0.000000e+00, %1 ], [ %21, %2 ] + %9 = fsub double 0.000000e+00, %8 + %10 = fsub double 0.000000e+00, %7 + %11 = fmul double %9, 0.000000e+00 + %12 = fmul double %10, 0.000000e+00 + %13 = fsub double 0.000000e+00, %6 + %14 = fsub double 0.000000e+00, %5 + %15 = tail call double @llvm.fmuladd.f64(double %13, double %13, double %11) + %16 = tail call double @llvm.fmuladd.f64(double %14, double %14, double %12) + %17 = fsub double 0.000000e+00, %4 + %18 = fsub double 0.000000e+00, %3 + %19 = tail call double @llvm.fmuladd.f64(double %17, double %17, double %15) + %20 = tail call double @llvm.fmuladd.f64(double %18, double %18, double %16) + %21 = load double, ptr null, align 8 + %22 = fcmp olt double %19, %0 + %23 = fcmp olt double %20, 0.000000e+00 + %24 = or i1 %23, %22 + br label %2 +} + +declare double @llvm.fmuladd.f64(double, double, double) + +; This should *not* be vectorized as the insertion into the vector isn't free, +; which is recognized in SystemZTTImpl::getScalarizationOverhead(). +define void @fun2(ptr %0, ptr %Dst) { +; CHECK-LABEL: define void @fun2( +; CHECK: insertelement +; CHECK: store <2 x i64> +; +; REMARK-LABEL: Function: fun2 +; REMARK: Args: +; REMARK-NEXT: - String: 'Stores SLP vectorized with cost ' +; REMARK-NEXT: - Cost: '-1' + + %3 = load i64, ptr %0, align 8 + %4 = icmp eq i64 %3, 0 + br i1 %4, label %5, label %6 + +5: + ret void + +6: + %7 = getelementptr i8, ptr %Dst, i64 24 + store i64 %3, ptr %7, align 8 + %8 = getelementptr i8, ptr %Dst, i64 16 + store i64 0, ptr %8, align 8 + br label %5 +} From 81e536ec87a108d012cf9156a2c3fc672fb92155 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 25 Oct 2024 15:43:47 +0000 Subject: [PATCH 029/425] [clang][test] Fix typo in arm-mfp8.cpp New test added by https://github.com/llvm/llvm-project/pull/97277. --- clang/test/AST/arm-mfp8.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/AST/arm-mfp8.cpp b/clang/test/AST/arm-mfp8.cpp index a00d055f7d96794..51bebba067eb9f6 100644 --- a/clang/test/AST/arm-mfp8.cpp +++ b/clang/test/AST/arm-mfp8.cpp @@ -69,7 +69,7 @@ class C1 { //CHECK-NEXT: | | `-CompoundStmt {{.*}} //CHECK-NEXT: | | `-ReturnStmt {{.*}} //CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' -//CHECK-NEXT: | | `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}}8 'arg' '__mfp8':'__MFloat8_t' +//CHECK-NEXT: | | `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}} 'arg' '__mfp8':'__MFloat8_t' //CHECK-NEXT: | `-CXXMethodDecl {{.*}} func2c '__mfp8 (__mfp8)' static implicit-inline //CHECK-NEXT: | |-ParmVarDecl {{.*}} arg '__mfp8':'__MFloat8_t' //CHECK-NEXT: | `-CompoundStmt {{.*}} From 5c20891b2bb60f82dd82a8e90b111f8c13a13ad4 Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Fri, 25 Oct 2024 08:52:56 -0700 Subject: [PATCH 030/425] [WebKit Checkers] Allow a guardian CheckedPtr/CheckedRef (#110222) This PR makes WebKit checkers allow a guardian variable which is CheckedPtr or CheckedRef as in addition to RefPtr or Ref. --- .../Checkers/WebKit/ASTUtils.cpp | 16 +++--- .../Checkers/WebKit/PtrTypesSemantics.cpp | 43 +++++++++++++--- .../Checkers/WebKit/PtrTypesSemantics.h | 22 ++++++-- .../WebKit/UncountedCallArgsChecker.cpp | 2 + .../WebKit/UncountedLocalVarsChecker.cpp | 1 + .../Checkers/WebKit/call-args-checked.cpp | 46 +++++++++++++++++ .../Analysis/Checkers/WebKit/mock-types.h | 16 ++++-- .../Checkers/WebKit/uncounted-local-vars.cpp | 51 +++++++++++++++++++ 8 files changed, 177 insertions(+), 20 deletions(-) create mode 100644 clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp index b7b2f8a16f07b31..9d34dfd3cea636b 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp @@ -17,6 +17,10 @@ namespace clang { +bool isSafePtr(clang::CXXRecordDecl *Decl) { + return isRefCounted(Decl) || isCheckedPtr(Decl); +} + bool tryToFindPtrOrigin( const Expr *E, bool StopAtFirstRefCountedObj, std::function callback) { @@ -31,7 +35,7 @@ bool tryToFindPtrOrigin( } if (auto *tempExpr = dyn_cast(E)) { if (auto *C = tempExpr->getConstructor()) { - if (auto *Class = C->getParent(); Class && isRefCounted(Class)) + if (auto *Class = C->getParent(); Class && isSafePtr(Class)) return callback(E, true); break; } @@ -56,7 +60,7 @@ bool tryToFindPtrOrigin( if (StopAtFirstRefCountedObj) { if (auto *ConversionFunc = dyn_cast_or_null(cast->getConversionFunction())) { - if (isCtorOfRefCounted(ConversionFunc)) + if (isCtorOfSafePtr(ConversionFunc)) return callback(E, true); } } @@ -68,7 +72,7 @@ bool tryToFindPtrOrigin( if (auto *call = dyn_cast(E)) { if (auto *memberCall = dyn_cast(call)) { if (auto *decl = memberCall->getMethodDecl()) { - std::optional IsGetterOfRefCt = isGetterOfRefCounted(decl); + std::optional IsGetterOfRefCt = isGetterOfSafePtr(decl); if (IsGetterOfRefCt && *IsGetterOfRefCt) { E = memberCall->getImplicitObjectArgument(); if (StopAtFirstRefCountedObj) { @@ -87,7 +91,7 @@ bool tryToFindPtrOrigin( } if (auto *callee = call->getDirectCallee()) { - if (isCtorOfRefCounted(callee)) { + if (isCtorOfRefCounted(callee) || isCtorOfCheckedPtr(callee)) { if (StopAtFirstRefCountedObj) return callback(E, true); @@ -95,7 +99,7 @@ bool tryToFindPtrOrigin( continue; } - if (isRefType(callee->getReturnType())) + if (isSafePtrType(callee->getReturnType())) return callback(E, true); if (isSingleton(callee)) @@ -114,7 +118,7 @@ bool tryToFindPtrOrigin( } if (auto *ObjCMsgExpr = dyn_cast(E)) { if (auto *Method = ObjCMsgExpr->getMethodDecl()) { - if (isRefType(Method->getReturnType())) + if (isSafePtrType(Method->getReturnType())) return callback(E, true); } } diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp index 71440e6d08a1c9a..2293dcf1d4bd643 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp @@ -135,7 +135,16 @@ bool isCtorOfRefCounted(const clang::FunctionDecl *F) { || FunctionName == "Identifier"; } -bool isRefType(const clang::QualType T) { +bool isCtorOfCheckedPtr(const clang::FunctionDecl *F) { + assert(F); + return isCheckedPtr(safeGetName(F)); +} + +bool isCtorOfSafePtr(const clang::FunctionDecl *F) { + return isCtorOfRefCounted(F) || isCtorOfCheckedPtr(F); +} + +bool isSafePtrType(const clang::QualType T) { QualType type = T; while (!type.isNull()) { if (auto *elaboratedT = type->getAs()) { @@ -145,7 +154,7 @@ bool isRefType(const clang::QualType T) { if (auto *specialT = type->getAs()) { if (auto *decl = specialT->getTemplateName().getAsTemplateDecl()) { auto name = decl->getNameAsString(); - return isRefType(name); + return isRefType(name) || isCheckedPtr(name); } return false; } @@ -177,6 +186,12 @@ std::optional isUncounted(const CXXRecordDecl* Class) return (*IsRefCountable); } +std::optional isUnchecked(const CXXRecordDecl *Class) { + if (isCheckedPtr(Class)) + return false; // Cheaper than below + return isCheckedPtrCapable(Class); +} + std::optional isUncountedPtr(const QualType T) { if (T->isPointerType() || T->isReferenceType()) { if (auto *CXXRD = T->getPointeeCXXRecordDecl()) @@ -185,8 +200,16 @@ std::optional isUncountedPtr(const QualType T) { return false; } -std::optional isGetterOfRefCounted(const CXXMethodDecl* M) -{ +std::optional isUnsafePtr(const QualType T) { + if (T->isPointerType() || T->isReferenceType()) { + if (auto *CXXRD = T->getPointeeCXXRecordDecl()) { + return isUncounted(CXXRD) || isUnchecked(CXXRD); + } + } + return false; +} + +std::optional isGetterOfSafePtr(const CXXMethodDecl *M) { assert(M); if (isa(M)) { @@ -194,6 +217,9 @@ std::optional isGetterOfRefCounted(const CXXMethodDecl* M) auto className = safeGetName(calleeMethodsClass); auto method = safeGetName(M); + if (isCheckedPtr(className) && (method == "get" || method == "ptr")) + return true; + if ((isRefType(className) && (method == "get" || method == "ptr")) || ((className == "String" || className == "AtomString" || className == "AtomStringImpl" || className == "UniqueString" || @@ -205,7 +231,12 @@ std::optional isGetterOfRefCounted(const CXXMethodDecl* M) // FIXME: Currently allowing any Ref -> whatever cast. if (isRefType(className)) { if (auto *maybeRefToRawOperator = dyn_cast(M)) - return isUncountedPtr(maybeRefToRawOperator->getConversionType()); + return isUnsafePtr(maybeRefToRawOperator->getConversionType()); + } + + if (isCheckedPtr(className)) { + if (auto *maybeRefToRawOperator = dyn_cast(M)) + return isUnsafePtr(maybeRefToRawOperator->getConversionType()); } } return false; @@ -448,7 +479,7 @@ class TrivialFunctionAnalysisVisitor if (!Callee) return false; - std::optional IsGetterOfRefCounted = isGetterOfRefCounted(Callee); + std::optional IsGetterOfRefCounted = isGetterOfSafePtr(Callee); if (IsGetterOfRefCounted && *IsGetterOfRefCounted) return true; diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h index 8e6aadf63b6d679..4b41ca96e1df1d3 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h @@ -63,18 +63,30 @@ std::optional isUncounted(const clang::CXXRecordDecl* Class); /// class, false if not, std::nullopt if inconclusive. std::optional isUncountedPtr(const clang::QualType T); -/// \returns true if Name is a RefPtr, Ref, or its variant, false if not. -bool isRefType(const std::string &Name); +/// \returns true if \p T is a RefPtr, Ref, CheckedPtr, CheckedRef, or its +/// variant, false if not. +bool isSafePtrType(const clang::QualType T); /// \returns true if \p F creates ref-countable object from uncounted parameter, /// false if not. bool isCtorOfRefCounted(const clang::FunctionDecl *F); -/// \returns true if \p T is RefPtr, Ref, or its variant, false if not. -bool isRefType(const clang::QualType T); +/// \returns true if \p F creates checked ptr object from uncounted parameter, +/// false if not. +bool isCtorOfCheckedPtr(const clang::FunctionDecl *F); + +/// \returns true if \p F creates ref-countable or checked ptr object from +/// uncounted parameter, false if not. +bool isCtorOfSafePtr(const clang::FunctionDecl *F); + +/// \returns true if \p Name is RefPtr, Ref, or its variant, false if not. +bool isRefType(const std::string &Name); + +/// \returns true if \p Name is CheckedRef or CheckedPtr, false if not. +bool isCheckedPtr(const std::string &Name); /// \returns true if \p M is getter of a ref-counted class, false if not. -std::optional isGetterOfRefCounted(const clang::CXXMethodDecl* Method); +std::optional isGetterOfSafePtr(const clang::CXXMethodDecl *Method); /// \returns true if \p F is a conversion between ref-countable or ref-counted /// pointer types. diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp index cea3503fa2c314d..1a5a7309a54f167 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp @@ -96,6 +96,8 @@ class UncountedCallArgsChecker auto name = safeGetName(MD); if (name == "ref" || name == "deref") return; + if (name == "incrementPtrCount" || name == "decrementPtrCount") + return; } auto *E = MemberCallExpr->getImplicitObjectArgument(); QualType ArgType = MemberCallExpr->getObjectType().getCanonicalType(); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp index 81d21100de878db..5cdf047738abcb2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp @@ -227,6 +227,7 @@ class UncountedLocalVarsChecker if (MaybeGuardianArgCXXRecord) { if (MaybeGuardian->isLocalVarDecl() && (isRefCounted(MaybeGuardianArgCXXRecord) || + isCheckedPtr(MaybeGuardianArgCXXRecord) || isRefcountedStringsHack(MaybeGuardian)) && isGuardedScopeEmbeddedInGuardianScope( V, MaybeGuardian)) diff --git a/clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp b/clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp new file mode 100644 index 000000000000000..49b6bfcd7cadfdc --- /dev/null +++ b/clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp @@ -0,0 +1,46 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s + +#include "mock-types.h" + +RefCountableAndCheckable* makeObj(); +CheckedRef makeObjChecked(); +void someFunction(RefCountableAndCheckable*); + +namespace call_args_unchecked_uncounted { + +static void foo() { + someFunction(makeObj()); + // expected-warning@-1{{Call argument is uncounted and unsafe [alpha.webkit.UncountedCallArgsChecker]}} +} + +} // namespace call_args_checked + +namespace call_args_checked { + +static void foo() { + CheckedPtr ptr = makeObj(); + someFunction(ptr.get()); +} + +static void bar() { + someFunction(CheckedPtr { makeObj() }.get()); +} + +static void baz() { + someFunction(makeObjChecked().ptr()); +} + +} // namespace call_args_checked + +namespace call_args_default { + +void someFunction(RefCountableAndCheckable* = makeObj()); +// expected-warning@-1{{Call argument is uncounted and unsafe [alpha.webkit.UncountedCallArgsChecker]}} +void otherFunction(RefCountableAndCheckable* = makeObjChecked().ptr()); + +void foo() { + someFunction(); + otherFunction(); +} + +} diff --git a/clang/test/Analysis/Checkers/WebKit/mock-types.h b/clang/test/Analysis/Checkers/WebKit/mock-types.h index 933b4c5e62a79cc..8d8a90f0afae0e1 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-types.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-types.h @@ -114,8 +114,8 @@ template struct CheckedRef { public: CheckedRef() : t{} {}; - CheckedRef(T &t) : t(t) { t->incrementPtrCount(); } - CheckedRef(const CheckedRef& o) : t(o.t) { if (t) t->incrementPtrCount(); } + CheckedRef(T &t) : t(&t) { t.incrementPtrCount(); } + CheckedRef(const CheckedRef &o) : t(o.t) { if (t) t->incrementPtrCount(); } ~CheckedRef() { if (t) t->decrementPtrCount(); } T &get() { return *t; } T *ptr() { return t; } @@ -135,7 +135,7 @@ template struct CheckedPtr { if (t) t->incrementPtrCount(); } - CheckedPtr(Ref&& o) + CheckedPtr(Ref &&o) : t(o.leakRef()) { } ~CheckedPtr() { @@ -156,4 +156,14 @@ class CheckedObj { void decrementPtrCount(); }; +class RefCountableAndCheckable { +public: + void incrementPtrCount() const; + void decrementPtrCount() const; + void ref() const; + void deref() const; + void method(); + int trivial() { return 0; } +}; + #endif diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp index b5f6b8535bf4181..1c0df42cdda663c 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp @@ -290,6 +290,57 @@ void foo() { } // namespace local_assignment_to_global +namespace local_refcountable_checkable_object { + +RefCountableAndCheckable* provide_obj(); + +void local_raw_ptr() { + RefCountableAndCheckable* a = nullptr; + // expected-warning@-1{{Local variable 'a' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + a = provide_obj(); + a->method(); +} + +void local_checked_ptr() { + CheckedPtr a = nullptr; + a = provide_obj(); + a->method(); +} + +void local_var_with_guardian_checked_ptr() { + CheckedPtr a = provide_obj(); + { + auto* b = a.get(); + b->method(); + } +} + +void local_var_with_guardian_checked_ptr_with_assignment() { + CheckedPtr a = provide_obj(); + { + RefCountableAndCheckable* b = a.get(); + // expected-warning@-1{{Local variable 'b' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + b = provide_obj(); + b->method(); + } +} + +void local_var_with_guardian_checked_ref() { + CheckedRef a = *provide_obj(); + { + RefCountableAndCheckable& b = a; + b.method(); + } +} + +void static_var() { + static RefCountableAndCheckable* a = nullptr; + // expected-warning@-1{{Static local variable 'a' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + a = provide_obj(); +} + +} // namespace local_refcountable_checkable_object + namespace local_var_in_recursive_function { struct TreeNode { From 1f2b7ae6d78906df4f0c06961e3c9ed227986acf Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 25 Oct 2024 12:28:55 -0400 Subject: [PATCH 031/425] [libc++] Refactor locale_guard (#113694) Rename __libcpp_locale_guard to just __locale_guard, since there's no reason for it to have __libcpp_ in its name -- it's just an internal utility. Also, define __locale_guard unconditionally of _LIBCPP_LOCALE__L_EXTENSIONS, since that header is only used on Windows (where it has a custom definition) or from bsd_locale_fallbacks.h, which is only included when the L extensions are not provided. --- libcxx/include/CMakeLists.txt | 2 +- .../locale_base_api/bsd_locale_fallbacks.h | 30 ++++++------- .../{locale_base_api => }/locale_guard.h | 42 +++++++++---------- libcxx/include/module.modulemap | 2 +- libcxx/src/iostream.cpp | 4 +- libcxx/src/support/win32/locale_win32.cpp | 34 +++++++-------- 6 files changed, 56 insertions(+), 58 deletions(-) rename libcxx/include/__locale_dir/{locale_base_api => }/locale_guard.h (73%) diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 975adc03ec81da0..63aa74e09bb1a27 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -496,11 +496,11 @@ set(files __locale_dir/locale_base_api/bsd_locale_fallbacks.h __locale_dir/locale_base_api/fuchsia.h __locale_dir/locale_base_api/ibm.h - __locale_dir/locale_base_api/locale_guard.h __locale_dir/locale_base_api/musl.h __locale_dir/locale_base_api/newlib.h __locale_dir/locale_base_api/openbsd.h __locale_dir/locale_base_api/win32.h + __locale_dir/locale_guard.h __math/abs.h __math/copysign.h __math/error_functions.h diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h index 5f99c7aea02a96a..ae2db6ae70bebcb 100644 --- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h +++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h @@ -13,7 +13,7 @@ #ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H #define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H -#include <__locale_dir/locale_base_api/locale_guard.h> +#include <__locale_dir/locale_guard.h> #include #include #include @@ -29,64 +29,64 @@ _LIBCPP_BEGIN_NAMESPACE_STD inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __libcpp_mb_cur_max_l(locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return MB_CUR_MAX; } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI wint_t __libcpp_btowc_l(int __c, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return btowc(__c); } inline _LIBCPP_HIDE_FROM_ABI int __libcpp_wctob_l(wint_t __c, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return wctob(__c); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_wcsnrtombs_l(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return wcsnrtombs(__dest, __src, __nwc, __len, __ps); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_wcrtomb_l(char* __s, wchar_t __wc, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return wcrtomb(__s, __wc, __ps); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbsnrtowcs_l(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbsnrtowcs(__dest, __src, __nms, __len, __ps); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbrtowc_l(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbrtowc(__pwc, __s, __n, __ps); } inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mbtowc_l(wchar_t* __pwc, const char* __pmb, size_t __max, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbtowc(__pwc, __pmb, __max); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbrlen_l(const char* __s, size_t __n, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbrlen(__s, __n, __ps); } #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI lconv* __libcpp_localeconv_l(locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return localeconv(); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbsrtowcs_l(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbsrtowcs(__dest, __src, __len, __ps); } #endif @@ -95,7 +95,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __libcpp_snprintf_l( char* __s, size_t __n, locale_t __l, const char* __format, ...) { va_list __va; va_start(__va, __format); - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); int __res = vsnprintf(__s, __n, __format, __va); va_end(__va); return __res; @@ -105,7 +105,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l( char** __s, locale_t __l, const char* __format, ...) { va_list __va; va_start(__va, __format); - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); int __res = vasprintf(__s, __format, __va); va_end(__va); return __res; @@ -115,7 +115,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l( const char* __s, locale_t __l, const char* __format, ...) { va_list __va; va_start(__va, __format); - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); int __res = vsscanf(__s, __format, __va); va_end(__va); return __res; diff --git a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__locale_dir/locale_guard.h similarity index 73% rename from libcxx/include/__locale_dir/locale_base_api/locale_guard.h rename to libcxx/include/__locale_dir/locale_guard.h index 7d15f2d253adc39..e0c414c001c41f1 100644 --- a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h +++ b/libcxx/include/__locale_dir/locale_guard.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H -#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_GUARD_H +#define _LIBCPP___LOCALE_DIR_LOCALE_GUARD_H #include <__config> #include <__locale> // for locale_t @@ -19,23 +19,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if !defined(_LIBCPP_LOCALE__L_EXTENSIONS) -struct __libcpp_locale_guard { - _LIBCPP_HIDE_FROM_ABI __libcpp_locale_guard(locale_t& __loc) : __old_loc_(uselocale(__loc)) {} - - _LIBCPP_HIDE_FROM_ABI ~__libcpp_locale_guard() { - if (__old_loc_) - uselocale(__old_loc_); - } - - locale_t __old_loc_; - - __libcpp_locale_guard(__libcpp_locale_guard const&) = delete; - __libcpp_locale_guard& operator=(__libcpp_locale_guard const&) = delete; -}; -#elif defined(_LIBCPP_MSVCRT_LIKE) -struct __libcpp_locale_guard { - __libcpp_locale_guard(locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { +#if defined(_LIBCPP_MSVCRT_LIKE) +struct __locale_guard { + __locale_guard(locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { // Setting the locale can be expensive even when the locale given is // already the current locale, so do an explicit check to see if the // current locale is already the one we want. @@ -51,7 +37,7 @@ struct __libcpp_locale_guard { __setlocale(__l.__get_locale()); } } - ~__libcpp_locale_guard() { + ~__locale_guard() { // The CRT documentation doesn't explicitly say, but setlocale() does the // right thing when given a semicolon-separated list of locale settings // for the different categories in the same format as returned by @@ -71,8 +57,22 @@ struct __libcpp_locale_guard { int __status; char* __locale_all = nullptr; }; +#else +struct __locale_guard { + _LIBCPP_HIDE_FROM_ABI __locale_guard(locale_t& __loc) : __old_loc_(uselocale(__loc)) {} + + _LIBCPP_HIDE_FROM_ABI ~__locale_guard() { + if (__old_loc_) + uselocale(__old_loc_); + } + + locale_t __old_loc_; + + __locale_guard(__locale_guard const&) = delete; + __locale_guard& operator=(__locale_guard const&) = delete; +}; #endif _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_LOCALE_GUARD_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_GUARD_H diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index b429d7cff702b81..c79070c318759db 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1443,7 +1443,7 @@ module std [system] { module locale { header "locale" header "__locale_dir/locale_base_api.h" - header "__locale_dir/locale_base_api/locale_guard.h" + header "__locale_dir/locale_guard.h" module locale_base_api { textual header "__locale_dir/locale_base_api/android.h" textual header "__locale_dir/locale_base_api/bsd_locale_defaults.h" diff --git a/libcxx/src/iostream.cpp b/libcxx/src/iostream.cpp index c5ad77a01916084..48d2fdb866a332c 100644 --- a/libcxx/src/iostream.cpp +++ b/libcxx/src/iostream.cpp @@ -12,7 +12,7 @@ #include #ifdef _LIBCPP_MSVCRT_LIKE -# include <__locale_dir/locale_base_api/locale_guard.h> +# include <__locale_dir/locale_guard.h> #endif #define _str(s) #s @@ -109,7 +109,7 @@ static void force_locale_initialization() { static bool once = []() { auto loc = newlocale(LC_ALL_MASK, "C", 0); { - __libcpp_locale_guard g(loc); // forces initialization of locale TLS + __locale_guard g(loc); // forces initialization of locale TLS ((void)g); } freelocale(loc); diff --git a/libcxx/src/support/win32/locale_win32.cpp b/libcxx/src/support/win32/locale_win32.cpp index 57ef94932ba0a76..2a08e97b8645b40 100644 --- a/libcxx/src/support/win32/locale_win32.cpp +++ b/libcxx/src/support/win32/locale_win32.cpp @@ -11,12 +11,10 @@ #include #include -#include <__locale_dir/locale_base_api/locale_guard.h> +#include <__locale_dir/locale_guard.h> int __libcpp_vasprintf(char** sptr, const char* __restrict fmt, va_list ap); -using std::__libcpp_locale_guard; - // FIXME: base and mask currently unused. Needs manual work to construct the new locale locale_t newlocale(int /*mask*/, const char* locale, locale_t /*base*/) { return {_create_locale(LC_ALL, locale), locale}; @@ -26,33 +24,33 @@ decltype(MB_CUR_MAX) MB_CUR_MAX_L(locale_t __l) { #if defined(_LIBCPP_MSVCRT) return ___mb_cur_max_l_func(__l); #else - __libcpp_locale_guard __current(__l); + std::__locale_guard __current(__l); return MB_CUR_MAX; #endif } lconv* localeconv_l(locale_t& loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); lconv* lc = localeconv(); if (!lc) return lc; return loc.__store_lconv(lc); } size_t mbrlen_l(const char* __restrict s, size_t n, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return mbrlen(s, n, ps); } size_t mbsrtowcs_l(wchar_t* __restrict dst, const char** __restrict src, size_t len, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return mbsrtowcs(dst, src, len, ps); } size_t wcrtomb_l(char* __restrict s, wchar_t wc, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return wcrtomb(s, wc, ps); } size_t mbrtowc_l(wchar_t* __restrict pwc, const char* __restrict s, size_t n, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return mbrtowc(pwc, s, n, ps); } size_t mbsnrtowcs_l(wchar_t* __restrict dst, @@ -61,7 +59,7 @@ size_t mbsnrtowcs_l(wchar_t* __restrict dst, size_t len, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return mbsnrtowcs(dst, src, nms, len, ps); } size_t wcsnrtombs_l(char* __restrict dst, @@ -70,15 +68,15 @@ size_t wcsnrtombs_l(char* __restrict dst, size_t len, mbstate_t* __restrict ps, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return wcsnrtombs(dst, src, nwc, len, ps); } wint_t btowc_l(int c, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return btowc(c); } int wctob_l(wint_t c, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return wctob(c); } @@ -90,7 +88,7 @@ int snprintf_l(char* ret, size_t n, locale_t loc, const char* format, ...) { int result = __stdio_common_vsprintf( _CRT_INTERNAL_LOCAL_PRINTF_OPTIONS | _CRT_INTERNAL_PRINTF_STANDARD_SNPRINTF_BEHAVIOR, ret, n, format, loc, ap); #else - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") int result = vsnprintf(ret, n, format, ap); @@ -108,25 +106,25 @@ int asprintf_l(char** ret, locale_t loc, const char* format, ...) { return result; } int vasprintf_l(char** ret, locale_t loc, const char* format, va_list ap) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return __libcpp_vasprintf(ret, format, ap); } #if !defined(_LIBCPP_MSVCRT) float strtof_l(const char* nptr, char** endptr, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return strtof(nptr, endptr); } long double strtold_l(const char* nptr, char** endptr, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return strtold(nptr, endptr); } #endif #if defined(__MINGW32__) && __MSVCRT_VERSION__ < 0x0800 size_t strftime_l(char* ret, size_t n, const char* format, const struct tm* tm, locale_t loc) { - __libcpp_locale_guard __current(loc); + std::__locale_guard __current(loc); return strftime(ret, n, format, tm); } #endif From ba81e1949a4f25216e2b3ea3a1507a52db88562a Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 25 Oct 2024 17:32:25 +0100 Subject: [PATCH 032/425] [AArch64] Add assembly/disassembly for BFMOP4{A,S} (widening) instructions (#113203) The new instructions are described in https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 3 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 37 +++ .../SME2p2/bfmop4as-widening-diagnostics.s | 220 ++++++++++++++++++ .../MC/AArch64/SME2p2/bfmop4as-widening.s | 178 ++++++++++++++ 4 files changed, 438 insertions(+) create mode 100644 llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 6044b5bb7d81511..b763aa15a7c3f15 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1004,6 +1004,9 @@ let Predicates = [HasSME2p2] in { def FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, "ftmopa">; def FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, "ftmopa">; def BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, "bftmopa">; + + defm BFMOP4A : sme2_bfmop4as_widening<0, "bfmop4a">; + defm BFMOP4S : sme2_bfmop4as_widening<1, "bfmop4s">; } // [HasSME2p2] let Predicates = [HasSME2p2, HasSMEB16B16] in { diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 08929ed5616b2c8..4cfe18eddf481cb 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5188,3 +5188,40 @@ class sme2_luti4_vector_vg4_strided sz, bits<2> op, string mnemonic> let Inst{3-2} = 0b00; let Inst{1-0} = Zd{1-0}; } + +class sme2_bf16_fp32_quarter_tile_outer_product + : I<(outs TileOp32:$ZAda), + (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000001000; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3-2} = 0b00; + let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_bfmop4as_widening { + // Single vectors + def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; +} diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s new file mode 100644 index 000000000000000..5906bcb07f15d5a --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s @@ -0,0 +1,220 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2 < %s 2>&1 | FileCheck %s + +// BFMOP4A + +// Single vectors + +bfmop4a za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.s, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +bfmop4a za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.s, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.s, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +bfmop4a za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.s, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +bfmop4a za0.s, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.s, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +bfmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.s, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.s, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.s, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + + +// BFMOP4S + +// Single vectors + +bfmop4s za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.s, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +bfmop4s za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.s, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.s, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +bfmop4s za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.s, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +bfmop4s za0.s, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.s, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +bfmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.s, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.s, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.s, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s new file mode 100644 index 000000000000000..40d08e503c8bb32 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s @@ -0,0 +1,178 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// BFMOP4A + +// Single vectors + +bfmop4a za0.s, z0.h, z16.h // 10000001-00000000-00000000-00000000 +// CHECK-INST: bfmop4a za0.s, z0.h, z16.h +// CHECK-ENCODING: [0x00,0x00,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81000000 + +bfmop4a za3.s, z14.h, z30.h // 10000001-00001110-00000001-11000011 +// CHECK-INST: bfmop4a za3.s, z14.h, z30.h +// CHECK-ENCODING: [0xc3,0x01,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810e01c3 + +bfmop4a za1.s, z10.h, z20.h // 10000001-00000100-00000001-01000001 +// CHECK-INST: bfmop4a za1.s, z10.h, z20.h +// CHECK-ENCODING: [0x41,0x01,0x04,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81040141 + +// Single and multiple vectors + +bfmop4a za0.s, z0.h, {z16.h-z17.h} // 10000001-00010000-00000000-00000000 +// CHECK-INST: bfmop4a za0.s, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x00,0x00,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81100000 + +bfmop4a za3.s, z14.h, {z30.h-z31.h} // 10000001-00011110-00000001-11000011 +// CHECK-INST: bfmop4a za3.s, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xc3,0x01,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811e01c3 + +bfmop4a za2.s, z12.h, {z24.h-z25.h} // 10000001-00011000-00000001-10000010 +// CHECK-INST: bfmop4a za2.s, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x82,0x01,0x18,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81180182 + +// Multiple and single vectors + +bfmop4a za0.s, {z0.h-z1.h}, z16.h // 10000001-00000000-00000010-00000000 +// CHECK-INST: bfmop4a za0.s, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x00,0x02,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81000200 + +bfmop4a za3.s, {z14.h-z15.h}, z30.h // 10000001-00001110-00000011-11000011 +// CHECK-INST: bfmop4a za3.s, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xc3,0x03,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810e03c3 + +bfmop4a za2.s, {z12.h-z13.h}, z28.h // 10000001-00001100-00000011-10000010 +// CHECK-INST: bfmop4a za2.s, { z12.h, z13.h }, z28.h +// CHECK-ENCODING: [0x82,0x03,0x0c,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810c0382 + +// Multiple vectors + +bfmop4a za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00010000-00000010-00000000 +// CHECK-INST: bfmop4a za0.s, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x00,0x02,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81100200 + +bfmop4a za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00011110-00000011-11000011 +// CHECK-INST: bfmop4a za3.s, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc3,0x03,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811e03c3 + +bfmop4a za2.s, {z12.h-z13.h}, {z26.h-z27.h} // 10000001-00011010-00000011-10000010 +// CHECK-INST: bfmop4a za2.s, { z12.h, z13.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x82,0x03,0x1a,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811a0382 + + +// BFMOP4S + +// Single vectors + +bfmop4s za0.s, z0.h, z16.h // 10000001-00000000-00000000-00010000 +// CHECK-INST: bfmop4s za0.s, z0.h, z16.h +// CHECK-ENCODING: [0x10,0x00,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81000010 + +bfmop4s za3.s, z14.h, z30.h // 10000001-00001110-00000001-11010011 +// CHECK-INST: bfmop4s za3.s, z14.h, z30.h +// CHECK-ENCODING: [0xd3,0x01,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810e01d3 + +bfmop4s za1.s, z10.h, z20.h // 10000001-00000100-00000001-01010001 +// CHECK-INST: bfmop4s za1.s, z10.h, z20.h +// CHECK-ENCODING: [0x51,0x01,0x04,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81040151 + +// Single and multiple vectors + +bfmop4s za0.s, z0.h, {z16.h-z17.h} // 10000001-00010000-00000000-00010000 +// CHECK-INST: bfmop4s za0.s, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x10,0x00,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81100010 + +bfmop4s za3.s, z14.h, {z30.h-z31.h} // 10000001-00011110-00000001-11010011 +// CHECK-INST: bfmop4s za3.s, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xd3,0x01,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811e01d3 + +bfmop4s za2.s, z12.h, {z24.h-z25.h} // 10000001-00011000-00000001-10010010 +// CHECK-INST: bfmop4s za2.s, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x92,0x01,0x18,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81180192 + +// Multiple and single vectors + +bfmop4s za0.s, {z0.h-z1.h}, z16.h // 10000001-00000000-00000010-00010000 +// CHECK-INST: bfmop4s za0.s, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x10,0x02,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81000210 + +bfmop4s za3.s, {z14.h-z15.h}, z30.h // 10000001-00001110-00000011-11010011 +// CHECK-INST: bfmop4s za3.s, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xd3,0x03,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810e03d3 + +bfmop4s za2.s, {z12.h-z13.h}, z28.h // 10000001-00001100-00000011-10010010 +// CHECK-INST: bfmop4s za2.s, { z12.h, z13.h }, z28.h +// CHECK-ENCODING: [0x92,0x03,0x0c,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 810c0392 + +// Multiple vectors + +bfmop4s za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00010000-00000010-00010000 +// CHECK-INST: bfmop4s za0.s, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x10,0x02,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81100210 + +bfmop4s za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00011110-00000011-11010011 +// CHECK-INST: bfmop4s za3.s, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd3,0x03,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811e03d3 + +bfmop4s za2.s, {z12.h-z13.h}, {z26.h-z27.h} // 10000001-00011010-00000011-10010010 +// CHECK-INST: bfmop4s za2.s, { z12.h, z13.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x92,0x03,0x1a,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 811a0392 From 4161ca2092d3b92034515190f577aa200ec615bf Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Fri, 25 Oct 2024 14:54:56 +0100 Subject: [PATCH 033/425] [NFC][AArch64][LLVM] Update ReleaseNotes.md with Armv9.6-A (2024) arch extensions --- llvm/docs/ReleaseNotes.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 7cca9116a513451..be51b0af56ddbf7 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -110,6 +110,9 @@ Changes to the AArch64 Backend the required alignment space with a sequence of `0x0` bytes (the requested fill value) rather than NOPs. +* Assembler/disassembler support has been added for Armv9.6-A (2024) + architecture extensions. + Changes to the AMDGPU Backend ----------------------------- From 2ec5c69b6872b8b474f3d37b9125d3d57d144d1b Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Fri, 25 Oct 2024 09:42:01 -0700 Subject: [PATCH 034/425] Revert "[Sanitizers] Intercept timer_create" (#113710) Reverts llvm/llvm-project#112285 --- .../lib/hwasan/hwasan_platform_interceptors.h | 3 --- compiler-rt/lib/msan/tests/msan_test.cpp | 23 ------------------- .../sanitizer_common_interceptors.inc | 19 --------------- .../sanitizer_platform_interceptors.h | 3 --- .../sanitizer_platform_limits_posix.h | 4 ---- 5 files changed, 52 deletions(-) diff --git a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h index e8011014c2331d7..d92b51052194275 100644 --- a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h +++ b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h @@ -200,9 +200,6 @@ #undef SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID #define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID 0 -#undef SANITIZER_INTERCEPT_TIMER_CREATE -#define SANITIZER_INTERCEPT_TIMER_CREATE 0 - #undef SANITIZER_INTERCEPT_GETITIMER #define SANITIZER_INTERCEPT_GETITIMER 0 diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index ad265acf4c1e39a..41b99fabe84f478 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -4881,27 +4881,4 @@ TEST(MemorySanitizer, throw_catch) { // pass } } - -#if defined(__linux__) -TEST(MemorySanitizer, timer_create) { - timer_t timer; - EXPECT_POISONED(timer); - int res = timer_create(CLOCK_REALTIME, nullptr, &timer); - ASSERT_EQ(0, res); - EXPECT_NOT_POISONED(timer); - - // Make sure the timer is usable. - struct itimerspec cur_value {}; - cur_value.it_value.tv_sec = 1; - EXPECT_EQ(0, timer_settime(timer, 0, &cur_value, nullptr)); - - timer_t timer2; - EXPECT_POISONED(timer2); - // Use an invalid clock_id to make timer_create fail. - res = timer_create(INT_MAX, nullptr, &timer2); - ASSERT_EQ(-1, res); - EXPECT_POISONED(timer2); - timer_delete(timer); -} -#endif } // namespace diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 211f9f70d7e4c6c..b8627f8557afe29 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -2289,24 +2289,6 @@ INTERCEPTOR(int, pthread_getcpuclockid, uptr thread, #define INIT_CLOCK_GETCPUCLOCKID #endif -#if SANITIZER_INTERCEPT_TIMER_CREATE -INTERCEPTOR(int, timer_create, __sanitizer_clockid_t clockid, void *sevp, - __sanitizer_timer_t *timer) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, timer_create, clockid, sevp, timer); - int res = REAL(timer_create)(clockid, sevp, timer); - if (!res && timer) { - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, timer, sizeof *timer); - } - return res; -} - -# define INIT_TIMER_CREATE \ - COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(timer_create, "GLIBC_2.3.3"); -#else -# define INIT_TIMER_CREATE -#endif - #if SANITIZER_INTERCEPT_GETITIMER INTERCEPTOR(int, getitimer, int which, void *curr_value) { void *ctx; @@ -10284,7 +10266,6 @@ static void InitializeCommonInterceptors() { INIT_SETPWENT; INIT_CLOCK_GETTIME; INIT_CLOCK_GETCPUCLOCKID; - INIT_TIMER_CREATE; INIT_GETITIMER; INIT_TIME; INIT_GLOB; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 36fafdc642642bf..6959a6d52d604e0 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -237,9 +237,6 @@ (SI_FREEBSD || SI_NETBSD || SI_LINUX || SI_SOLARIS) #define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID \ (SI_LINUX || SI_FREEBSD || SI_NETBSD) -// TODO: This should be SI_POSIX, adding Linux first until I have time -// to verify all timer_t typedefs on other platforms. -#define SANITIZER_INTERCEPT_TIMER_CREATE SI_LINUX #define SANITIZER_INTERCEPT_GETITIMER SI_POSIX #define SANITIZER_INTERCEPT_TIME SI_POSIX #define SANITIZER_INTERCEPT_GLOB (SI_GLIBC || SI_SOLARIS) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index b4ccf7b3d7bef48..e8c81aa8e281637 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -1517,10 +1517,6 @@ extern const int si_SEGV_ACCERR; #define SIGACTION_SYMNAME sigaction -# if SANITIZER_LINUX -typedef void *__sanitizer_timer_t; -# endif - #endif // SANITIZER_LINUX || SANITIZER_APPLE #endif From 9ea6fcd02b172ec12c9d4b9157d4a37765d83421 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 25 Oct 2024 16:47:08 +0000 Subject: [PATCH 035/425] [gn build] Port 1f2b7ae6d789 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 1630c8004d31575..0586704850a51b2 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -568,11 +568,11 @@ if (current_toolchain == default_toolchain) { "__locale_dir/locale_base_api/bsd_locale_fallbacks.h", "__locale_dir/locale_base_api/fuchsia.h", "__locale_dir/locale_base_api/ibm.h", - "__locale_dir/locale_base_api/locale_guard.h", "__locale_dir/locale_base_api/musl.h", "__locale_dir/locale_base_api/newlib.h", "__locale_dir/locale_base_api/openbsd.h", "__locale_dir/locale_base_api/win32.h", + "__locale_dir/locale_guard.h", "__math/abs.h", "__math/copysign.h", "__math/error_functions.h", From 305a1ceae371b482375545650ba9fd9e4c165157 Mon Sep 17 00:00:00 2001 From: Alexander Richardson Date: Fri, 25 Oct 2024 10:02:40 -0700 Subject: [PATCH 036/425] [DataLayout] Refactor storage of non-integral address spaces Instead of storing this as a separate array of non-integral pointers, add it to the PointerSpec class instead. This will allow for future simplifications such as splitting the non-integral property into multiple distinct ones: relocatable (i.e. non-stable representation) and non-integral representation (i.e. pointers with metadata). Reviewed By: arsenm Pull Request: https://github.com/llvm/llvm-project/pull/105734 --- llvm/include/llvm/IR/DataLayout.h | 28 ++++++++++++++++----------- llvm/lib/IR/DataLayout.cpp | 32 ++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 8f7ab2f9df389ef..93bd519f5727d80 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -78,7 +78,11 @@ class DataLayout { Align ABIAlign; Align PrefAlign; uint32_t IndexBitWidth; - + /// Pointers in this address space don't have a well-defined bitwise + /// representation (e.g. may be relocated by a copying garbage collector). + /// Additionally, they may also be non-integral (i.e. containing additional + /// metadata such as bounds information/permissions). + bool IsNonIntegral; bool operator==(const PointerSpec &Other) const; }; @@ -133,10 +137,6 @@ class DataLayout { // The StructType -> StructLayout map. mutable void *LayoutMap = nullptr; - /// Pointers in these address spaces are non-integral, and don't have a - /// well-defined bitwise representation. - SmallVector NonIntegralAddressSpaces; - /// Sets or updates the specification for the given primitive type. void setPrimitiveSpec(char Specifier, uint32_t BitWidth, Align ABIAlign, Align PrefAlign); @@ -147,7 +147,8 @@ class DataLayout { /// Sets or updates the specification for pointer in the given address space. void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign, - Align PrefAlign, uint32_t IndexBitWidth); + Align PrefAlign, uint32_t IndexBitWidth, + bool IsNonIntegral); /// Internal helper to get alignment for integer of given bitwidth. Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const; @@ -165,7 +166,8 @@ class DataLayout { Error parsePointerSpec(StringRef Spec); /// Attempts to parse a single specification. - Error parseSpecification(StringRef Spec); + Error parseSpecification(StringRef Spec, + SmallVectorImpl &NonIntegralAddressSpaces); /// Attempts to parse a data layout string. Error parseLayoutString(StringRef LayoutString); @@ -337,13 +339,17 @@ class DataLayout { /// Return the address spaces containing non-integral pointers. Pointers in /// this address space don't have a well-defined bitwise representation. - ArrayRef getNonIntegralAddressSpaces() const { - return NonIntegralAddressSpaces; + SmallVector getNonIntegralAddressSpaces() const { + SmallVector AddrSpaces; + for (const PointerSpec &PS : PointerSpecs) { + if (PS.IsNonIntegral) + AddrSpaces.push_back(PS.AddrSpace); + } + return AddrSpaces; } bool isNonIntegralAddressSpace(unsigned AddrSpace) const { - ArrayRef NonIntegralSpaces = getNonIntegralAddressSpaces(); - return is_contained(NonIntegralSpaces, AddrSpace); + return getPointerSpec(AddrSpace).IsNonIntegral; } bool isNonIntegralPointerType(PointerType *PT) const { diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index d295d1f5785eb9d..a4af0ead07cf616 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -151,7 +151,8 @@ bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const { bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const { return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth && ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign && - IndexBitWidth == Other.IndexBitWidth; + IndexBitWidth == Other.IndexBitWidth && + IsNonIntegral == Other.IsNonIntegral; } namespace { @@ -206,7 +207,8 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = { // Default pointer type specifications. constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = { - {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64} // p0:64:64:64:64 + // p0:64:64:64:64 + {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false}, }; DataLayout::DataLayout() @@ -239,13 +241,11 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) { PointerSpecs = Other.PointerSpecs; StructABIAlignment = Other.StructABIAlignment; StructPrefAlignment = Other.StructPrefAlignment; - NonIntegralAddressSpaces = Other.NonIntegralAddressSpaces; return *this; } bool DataLayout::operator==(const DataLayout &Other) const { // NOTE: StringRepresentation might differ, it is not canonicalized. - // FIXME: NonIntegralAddressSpaces isn't compared. return BigEndian == Other.BigEndian && AllocaAddrSpace == Other.AllocaAddrSpace && ProgramAddrSpace == Other.ProgramAddrSpace && @@ -454,11 +454,13 @@ Error DataLayout::parsePointerSpec(StringRef Spec) { return createStringError( "index size cannot be larger than the pointer size"); - setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth); + setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth, + false); return Error::success(); } -Error DataLayout::parseSpecification(StringRef Spec) { +Error DataLayout::parseSpecification( + StringRef Spec, SmallVectorImpl &NonIntegralAddressSpaces) { // The "ni" specifier is the only two-character specifier. Handle it first. if (Spec.starts_with("ni")) { // ni:
[:
]... @@ -614,12 +616,23 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) { // Split the data layout string into specifications separated by '-' and // parse each specification individually, updating internal data structures. + SmallVector NonIntegralAddressSpaces; for (StringRef Spec : split(LayoutString, '-')) { if (Spec.empty()) return createStringError("empty specification is not allowed"); - if (Error Err = parseSpecification(Spec)) + if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces)) return Err; } + // Mark all address spaces that were qualified as non-integral now. This has + // to be done later since the non-integral property is not part of the data + // layout pointer specification. + for (unsigned AS : NonIntegralAddressSpaces) { + // If there is no special spec for a given AS, getPointerSpec(AS) returns + // the spec for AS0, and we then update that to mark it non-integral. + const PointerSpec &PS = getPointerSpec(AS); + setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth, + true); + } return Error::success(); } @@ -666,16 +679,17 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const { void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign, Align PrefAlign, - uint32_t IndexBitWidth) { + uint32_t IndexBitWidth, bool IsNonIntegral) { auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace()); if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) { PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign, - IndexBitWidth}); + IndexBitWidth, IsNonIntegral}); } else { I->BitWidth = BitWidth; I->ABIAlign = ABIAlign; I->PrefAlign = PrefAlign; I->IndexBitWidth = IndexBitWidth; + I->IsNonIntegral = IsNonIntegral; } } From 9d88543301f262e584a36ea969237a2cf054328b Mon Sep 17 00:00:00 2001 From: Abhina Sree Date: Fri, 25 Oct 2024 13:06:02 -0400 Subject: [PATCH 037/425] [AIX] Use internal lit shell for TableGen instead of a global setting (#113627) This is to address the latest lit regressions https://lab.llvm.org/buildbot/#/builders/64/builds/1285 caused by using the internal lit shell. This change will limit using the internal lit shell to TableGen on AIX so we do not hit these regressions. --- llvm/test/TableGen/lit.local.cfg | 8 ++++++++ llvm/utils/lit/lit/llvm/config.py | 7 ------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/test/TableGen/lit.local.cfg b/llvm/test/TableGen/lit.local.cfg index 0e827479cd41235..9d6dfdc14bbfb06 100644 --- a/llvm/test/TableGen/lit.local.cfg +++ b/llvm/test/TableGen/lit.local.cfg @@ -1,2 +1,10 @@ +import platform +import lit.formats + config.suffixes = [".td"] config.excludes = ["Common", "Inputs"] + +# AIX 'diff' command doesn't support --strip-trailing-cr, but the internal +# python implementation does, so use that for cross platform compatibility +if platform.system() == "AIX": + config.test_format = lit.formats.ShTest() diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py index 1ef5796cd32e448..5f762ec7f3514ab 100644 --- a/llvm/utils/lit/lit/llvm/config.py +++ b/llvm/utils/lit/lit/llvm/config.py @@ -57,13 +57,6 @@ def __init__(self, lit_config, config): self.lit_config.note("using lit tools: {}".format(path)) lit_path_displayed = True - if platform.system() == "AIX": - # Diff on AIX doesn't have all the required features (see - # https://github.com/llvm/llvm-project/pull/108871 and - # https://github.com/llvm/llvm-project/pull/112997#issuecomment-2429656192) - # so always use the internal shell. - self.use_lit_shell = True - if platform.system() == "OS/390": self.with_environment("_BPXK_AUTOCVT", "ON") self.with_environment("_TAG_REDIR_IN", "TXT") From f24c1dd08ea71fa7334a85fd2772c2f728de0c56 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 25 Oct 2024 18:11:20 +0100 Subject: [PATCH 038/425] Fix MSVC "signed/unsigned mismatch" warning. NFC. --- clang/tools/clang-format/ClangFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index 96fb85e99bf5f0f..5522d05744a2b4c 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -510,7 +510,7 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) { reformat(*FormatStyle, *ChangedCode, Ranges, AssumedFileName, &Status); Replaces = Replaces.merge(FormatChanges); if (DryRun) { - return Replaces.size() > (IsJson ? 1 : 0) && + return Replaces.size() > (IsJson ? 1u : 0u) && emitReplacementWarnings(Replaces, AssumedFileName, Code); } if (OutputXML) { From e6917e95548f81e7f00b8bca70ce571780e2afc9 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Fri, 25 Oct 2024 21:15:21 +0400 Subject: [PATCH 039/425] =?UTF-8?q?[clang][NFC]=20Add=20test=20for=20CWG18?= =?UTF-8?q?98=20"Use=20of=20=E2=80=9Cequivalent=E2=80=9D=20in=20overload?= =?UTF-8?q?=20resolution"=20(#113439)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [CWG1898](https://cplusplus.github.io/CWG/issues/1898.html) Use of “equivalent” in overload resolution ==================== [P1787R6](https://wg21.link/p1787r6): > CWG1898 is resolved by explicitly using the defined term parameter-type-list. Except that now it's called non-object-parameter-type-list, which is defined in [dcl.fct] [p8](https://eel.is/c++draft/dcl.fct#8) and [p4](https://eel.is/c++draft/dcl.fct#8). As for the wording, the first sentence [\_N4140\_.[over.dcl]/1](https://timsong-cpp.github.io/cppwp/n4140/over.dcl#1) where the word "equivalent" was used: > Two function declarations of the same name refer to the same function if they are in the same scope and have equivalent parameter declarations ([over.load]). was replaced with what is now known as "corresponding overloads", defined in [[basic.scope.scope]/4](https://eel.is/c++draft/basic.scope#scope-4). The definition is present in P1787R6, but it's hard to reference, because the "corresponding overloads" term was coined later. --- clang/test/CXX/drs/cwg18xx.cpp | 83 ++++++++++++++++++++++++++++++++++ clang/www/cxx_dr_status.html | 2 +- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/clang/test/CXX/drs/cwg18xx.cpp b/clang/test/CXX/drs/cwg18xx.cpp index 7f0fb8cf589d48c..b059492637bd5cf 100644 --- a/clang/test/CXX/drs/cwg18xx.cpp +++ b/clang/test/CXX/drs/cwg18xx.cpp @@ -640,3 +640,86 @@ namespace H { struct S s; } } + +namespace cwg1898 { // cwg1898: 2.7 +void e(int) {} // #cwg1898-e +void e(int) {} +// expected-error@-1 {{redefinition of 'e'}} +// expected-note@#cwg1898-e {{previous definition is here}} + +void e2(int) {} +void e2(long) {} // OK, different type + +void f(int) {} // #cwg1898-f +void f(const int) {} +// expected-error@-1 {{redefinition of 'f'}} +// expected-note@#cwg1898-f {{previous definition is here}} + +void g(int) {} // #cwg1898-g +void g(volatile int) {} +// since-cxx20-warning@-1 {{volatile-qualified parameter type 'volatile int' is deprecated}} +// expected-error@-2 {{redefinition of 'g'}} +// expected-note@#cwg1898-g {{previous definition is here}} + +void h(int *) {} // #cwg1898-h +void h(int[]) {} +// expected-error@-1 {{redefinition of 'h'}} +// expected-note@#cwg1898-h {{previous definition is here}} + +void h2(int *) {} // #cwg1898-h2 +void h2(int[2]) {} +// expected-error@-1 {{redefinition of 'h2'}} +// expected-note@#cwg1898-h2 {{previous definition is here}} + +void h3(int (*)[2]) {} // #cwg1898-h3 +void h3(int [3][2]) {} +// expected-error@-1 {{redefinition of 'h3'}} +// expected-note@#cwg1898-h3 {{previous definition is here}} + +void h4(int (*)[2]) {} +void h4(int [3][3]) {} // OK, differ in non-top-level extent of array + +void i(int *) {} +void i(const int *) {} // OK, pointee cv-qualification is not discarded + +void i2(int *) {} // #cwg1898-i2 +void i2(int * const) {} +// expected-error@-1 {{redefinition of 'i2'}} +// expected-note@#cwg1898-i2 {{previous definition is here}} + +void j(void(*)()) {} // #cwg1898-j +void j(void()) {} +// expected-error@-1 {{redefinition of 'j'}} +// expected-note@#cwg1898-j {{previous definition is here}} + +void j2(void(int)) {} // #cwg1898-j2 +void j2(void(const int)) {} +// expected-error@-1 {{redefinition of 'j2'}} +// expected-note@#cwg1898-j2 {{previous definition is here}} + +struct A { + void k(int) {} // #cwg1898-k + void k(int) {} + // expected-error@-1 {{class member cannot be redeclared}} + // expected-note@#cwg1898-k {{previous definition is here}} +}; + +struct B : A { + void k(int) {} // OK, shadows A::k +}; + +void l() {} +void l(...) {} + +#if __cplusplus >= 201103L +template +void m(T) {} +template +void m(Ts...) {} + +template +void m2(T, U) {} +template +void m2(Ts..., U) {} +#endif +} // namespace cwg1898 diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 82ba9b370ba5953..6640ed477a241e5 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -11219,7 +11219,7 @@

C++ defect report implementation status

1898 CD6 Use of “equivalent” in overload resolution - Unknown + Clang 2.7 1899 From d3c29e8d2f11742e83e2b80df47391598bf2e857 Mon Sep 17 00:00:00 2001 From: Yijia Gu Date: Fri, 25 Oct 2024 10:24:31 -0700 Subject: [PATCH 040/425] [mlir][test][bazel] add missing deps for TestPass --- utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 34beb758a12dd44..c69f793943beeca 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -547,8 +547,13 @@ cc_library( ":TestDialect", "//llvm:Support", "//mlir:FuncDialect", + "//mlir:GPUToSPIRV", + "//mlir:GPUTransforms", "//mlir:IR", "//mlir:Pass", + "//mlir:SPIRVDialect", + "//mlir:SPIRVToLLVM", + "//mlir:SPIRVTransforms", "//mlir:Support", ], ) From 6e7375031a1a3172d5e369cf2c108da2bcf65c8a Mon Sep 17 00:00:00 2001 From: Arvind Sudarsanam Date: Fri, 25 Oct 2024 10:27:42 -0700 Subject: [PATCH 041/425] [clang-linker-wrapper] Add error handling for missing linker path (#113613) In clang-linker-wrapper, we do not explicitly check if --linker-path is provided. This PR adds a check to capture this. Thanks --------- Signed-off-by: Arvind Sudarsanam --- clang/test/Driver/linker-wrapper.c | 4 ++++ clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c index 068ea2d7d3c663c..470af4d5d70cac7 100644 --- a/clang/test/Driver/linker-wrapper.c +++ b/clang/test/Driver/linker-wrapper.c @@ -250,3 +250,7 @@ __attribute__((visibility("protected"), used)) int x; // MLLVM-SAME: -Xlinker -mllvm=-pass-remarks=foo,bar // OFFLOAD-OPT-NOT: -Xlinker -mllvm=-pass-remarks=foo,bar // OFFLOAD-OPT-SAME: {{$}} + +// Error handling when --linker-path is not provided for clang-linker-wrapper +// RUN: not clang-linker-wrapper 2>&1 | FileCheck --check-prefix=LINKER-PATH-NOT-PROVIDED %s +// LINKER-PATH-NOT-PROVIDED: linker path missing, must pass 'linker-path' diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 9fea1fdcd5fb466..9fcecaee318a79f 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -370,6 +370,8 @@ Error runLinker(ArrayRef Files, const ArgList &Args) { // Render the linker arguments and add the newly created image. We add it // after the output file to ensure it is linked with the correct libraries. StringRef LinkerPath = Args.getLastArgValue(OPT_linker_path_EQ); + if (LinkerPath.empty()) + return createStringError("linker path missing, must pass 'linker-path'"); ArgStringList NewLinkerArgs; for (const opt::Arg *Arg : Args) { // Do not forward arguments only intended for the linker wrapper. From ac4bd74190fedfbe025ef757ff308dd184a507f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Fri, 25 Oct 2024 10:39:26 -0700 Subject: [PATCH 042/425] [mlir] Add apply_patterns.linalg.pad_vectorization TD Op (#112504) This PR simply wraps `populatePadOpVectorizationPatterns` into a new Transform Dialect Op: `apply_patterns.linalg.pad_vectorization`. This change makes it possible to run (and test) the corresponding patterns _without_: `transform.structured.vectorize_children_and_apply_patterns`. Note that the Op above only supports non-masked vectorisation (i.e. when the inputs are static), so, effectively, only fixed-width vectorisation (as opposed to scalable vectorisation). As such, this change is required to construct vectorization pipelines for tensor.pad targeting scalable vectors. To test the new Op and the corresponding patterns, I added "vectorization-pad-patterns.mlir" - most tests have been extracted from "vectorization-with-patterns.mlir". --- .../Linalg/TransformOps/LinalgTransformOps.td | 20 ++ .../TransformOps/LinalgTransformOps.cpp | 5 + .../Linalg/Transforms/Vectorization.cpp | 3 + .../Linalg/vectorization-pad-patterns.mlir | 274 ++++++++++++++++++ .../Linalg/vectorization-with-patterns.mlir | 143 --------- 5 files changed, 302 insertions(+), 143 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 040c04b0410ecf5..abf446887c54425 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -84,6 +84,26 @@ def ApplyFoldAddIntoDestPatternsOp : Op]> { + let description = [{ + Apply patterns that vectorize tensor.pad. + + These patterns rewrite tensor.pad Ops using vector.transfer_read and + vector.transfer_write operations. This is done either by: + 1. Folding tensor.pad with an existing vector.transfer_read / + vector.transfer_write Op (generated prior to running these patterns). + 2. Rewriting it (when matched together with q tensor.insert_slice + consumer Op) as a vector.transfer_read + vector.transfer_write pair. + + In both cases, these patterns look at producers and consumers for the + matched tensor.pad Op to find opportunities for vectorization. + }]; + + let assemblyFormat = "attr-dict"; +} + //===----------------------------------------------------------------------===// // BufferizeToAllocationOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 1f1d8ad89ae2b9b..3d3f0a93a3829bf 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -253,6 +253,11 @@ void transform::ApplyFoldAddIntoDestPatternsOp::populatePatterns( linalg::populateFoldAddIntoDestPatterns(patterns); } +void transform::ApplyPadVectorizationPatternsOp::populatePatterns( + RewritePatternSet &patterns) { + linalg::populatePadOpVectorizationPatterns(patterns); +} + //===----------------------------------------------------------------------===// // BufferizeToAllocationOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index e1b97fbf985df81..0a2457176a1d474 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2712,6 +2712,9 @@ struct PadOpVectorizationWithInsertSlicePattern void mlir::linalg::populatePadOpVectorizationPatterns( RewritePatternSet &patterns, PatternBenefit baseBenefit) { + // TODO: The following pattern implements "decomposition" and + // optional "vectorization". Seperate "decomposition" into a sepereate + // pre-processing pattern group. patterns.add(patterns.getContext(), baseBenefit); // Try these specialized patterns first before resorting to the generic one. diff --git a/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir new file mode 100644 index 000000000000000..2aa4638af3f0f3b --- /dev/null +++ b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir @@ -0,0 +1,274 @@ +// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s + +///---------------------------------------------------------------------------------------- +/// [Pattern: PadOpVectorizationWithTransferReadPattern] +///---------------------------------------------------------------------------------------- +// CHECK-LABEL: func @pad_and_transfer_read +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 +// CHECK: %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> +// CHECK: return %[[RESULT]] +func.func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %c6 = arith.constant 6.0 : f32 + %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { + ^bb0(%arg1: index, %arg2: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<10x13xf32> + %1 = vector.transfer_read %0[%c0, %c0], %c6 + : tensor<10x13xf32>, vector<7x9xf32> + return %1 : vector<7x9xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + +// ----- + +///---------------------------------------------------------------------------------------- +/// [Pattern: PadOpVectorizationWithTransferWritePattern] +///---------------------------------------------------------------------------------------- +func.func private @make_vector() -> vector<7x9xf32> + +// CHECK-LABEL: func @pad_and_transfer_write_static_low_and_high +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK-NOT: tensor.pad +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> +// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32> +// CHECK: return %[[RESULT]] +func.func @pad_and_transfer_write_static_low_and_high( + %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<10x13xf32> + %1 = call @make_vector() : () -> vector<7x9xf32> + %2 = vector.transfer_write %1, %0[%c0, %c0] + : vector<7x9xf32>, tensor<10x13xf32> + %3 = tensor.extract_slice %2[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> + return %3 : tensor<5x6xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + +// ----- + +func.func private @make_vector() -> vector<7x9xf32> + +// CHECK-LABEL: func @pad_and_transfer_write_static_low_dynamic_high +// CHECK-SAME: %[[ARG0:.*]]: tensor, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index +// CHECK-NOT: tensor.pad +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor to tensor +// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> +// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor +// CHECK: return %[[RESULT]] +func.func @pad_and_transfer_write_static_low_dynamic_high( + %arg0: tensor, %size: index, %padding: index) -> tensor { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1] + : tensor to tensor + %0 = tensor.pad %s low[0, 0] high[%padding, 7] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor to tensor + %1 = call @make_vector() : () -> vector<7x9xf32> + %2 = vector.transfer_write %1, %0[%c0, %c0] + : vector<7x9xf32>, tensor + %3 = tensor.extract_slice %2[0, 0] [%size, 6] [1, 1] : tensor to tensor + return %3 : tensor +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + + +// ----- + +///---------------------------------------------------------------------------------------- +/// [Pattern: PadOpVectorizationWithInsertSlicePattern] +///---------------------------------------------------------------------------------------- + +func.func private @make_vector() -> tensor<12x13xf32> + +// CHECK-LABEL: func @pad_and_insert_slice_source +// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 +// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[VEC0]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32> +// CHECK: return %[[WRITE]] +func.func @pad_and_insert_slice_source( + %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 0] high[2, 3] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<7x9xf32> + %1 = call @make_vector() : () -> tensor<12x13xf32> + %r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32> + return %r : tensor<12x13xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + + +// ----- + +///---------------------------------------------------------------------------------------- +/// tensor::PadOp -> tensor::EmptyOp + linalg::FillOp/tensor::GenerateOp + tensor::InsertSliceOp +/// [Pattern: GenericPadOpVectorizationPattern] +///---------------------------------------------------------------------------------------- + +func.func private @make_vector() -> tensor<12x13xf32> + +// Same as @pad_and_insert_slice_dest in vectorization-with-patterns.mlir, but +// over here linalg::fill is not vectorized (patterns for linalg.fill are not +// included here) +// CHECK-LABEL: func.func @pad_and_insert_slice_dest( +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { +// CHECK-NOT: tensor.pad +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[PAD:.*]] = arith.constant 5.000000e+00 : f32 +// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[PAD]] : f32) outs(%[[EMPTY]] : tensor<1x12x13xf32>) -> tensor<1x12x13xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32> +// CHECK: %[[VEC:.*]] = call @make_vector() : () -> tensor<12x13xf32> +// CHECK: %[[RES:.*]] = tensor.insert_slice %[[VEC]] into %[[WRITE]][0, 0, 0] [1, 12, 13] [1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32> +// CHECK: return %[[RES]] : tensor<1x12x13xf32> + +func.func @pad_and_insert_slice_dest( + %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 7] { + ^bb0(%arg2: index, %arg3: index, %arg4: index): + tensor.yield %c5 : f32 + } : tensor<1x5x6xf32> to tensor<1x12x13xf32> + %1 = call @make_vector() : () -> tensor<12x13xf32> + %r = tensor.insert_slice %1 into %0[0, 0, 0][1, 12, 13][1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32> + return %r : tensor<1x12x13xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + +// ----- +func.func private @make_vector() -> vector<7x9xf32> + +// Variant of @pad_and_transfer_write_static + +// CHECK-LABEL: func @pad_and_transfer_write_static_non_zero_low_pad +// CHECK-NOT: tensor.pad +// CHECK: linalg.fill +func.func @pad_and_transfer_write_static_non_zero_low_pad( + %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 1] high[5, 6] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<10x13xf32> + %1 = call @make_vector() : () -> vector<7x9xf32> + %2 = vector.transfer_write %1, %0[%c0, %c0] + : vector<7x9xf32>, tensor<10x13xf32> + %3 = tensor.extract_slice %2[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> + return %3 : tensor<5x6xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} + +// ----- +func.func private @make_vector() -> vector<7x9xf32> + +// Variant of @pad_and_transfer_write_static + +// CHECK-LABEL: func @pad_and_transfer_write_static_non_zero_offset +// CHECK-NOT: tensor.pad +// CHECK: linalg.fill +func.func @pad_and_transfer_write_static_non_zero_offset( + %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { + %c0 = arith.constant 0 : index + %c5 = arith.constant 5.0 : f32 + %0 = tensor.pad %arg0 low[0, 1] high[5, 6] { + ^bb0(%arg2: index, %arg3: index): + tensor.yield %c5 : f32 + } : tensor<5x6xf32> to tensor<10x13xf32> + %1 = call @make_vector() : () -> vector<7x9xf32> + %2 = vector.transfer_write %1, %0[%c0, %c0] + : vector<7x9xf32>, tensor<10x13xf32> + %3 = tensor.extract_slice %2[0, 1] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> + return %3 : tensor<5x6xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func"> + + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.pad_vectorization + } : !transform.op<"func.func"> + transform.yield + } +} diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir index 1c6a786bfa436d9..189507d97d6dc2f 100644 --- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir @@ -935,149 +935,6 @@ module attributes {transform.with_named_sequence} { } } -// ----- - -// CHECK-LABEL: func @pad_and_transfer_read -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: tensor.pad -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 -// CHECK: %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> -// CHECK: return %[[RESULT]] -func.func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5.0 : f32 - %c6 = arith.constant 6.0 : f32 - %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { - ^bb0(%arg1: index, %arg2: index): - tensor.yield %c5 : f32 - } : tensor<5x6xf32> to tensor<10x13xf32> - %1 = vector.transfer_read %0[%c0, %c0], %c6 - : tensor<10x13xf32>, vector<7x9xf32> - return %1 : vector<7x9xf32> -} - - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - -// ----- - -func.func private @make_vector() -> vector<7x9xf32> - -// CHECK-LABEL: func @pad_and_transfer_write_static -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: tensor.pad -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> -// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32> -// CHECK: return %[[RESULT]] -func.func @pad_and_transfer_write_static( - %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5.0 : f32 - %0 = tensor.pad %arg0 low[0, 0] high[5, 7] { - ^bb0(%arg2: index, %arg3: index): - tensor.yield %c5 : f32 - } : tensor<5x6xf32> to tensor<10x13xf32> - %1 = call @make_vector() : () -> vector<7x9xf32> - %2 = vector.transfer_write %1, %0[%c0, %c0] - : vector<7x9xf32>, tensor<10x13xf32> - %3 = tensor.extract_slice %2[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32> - return %3 : tensor<5x6xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - - -// ----- - -func.func private @make_vector() -> vector<7x9xf32> - -// CHECK-LABEL: func @pad_and_transfer_write_dynamic_static -// CHECK-SAME: %[[ARG0:.*]]: tensor, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index -// CHECK-NOT: tensor.pad -// CHECK: %[[C0:.*]] = arith.constant 0 : index -// CHECK: %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor to tensor -// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32> -// CHECK: %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor -// CHECK: return %[[RESULT]] -func.func @pad_and_transfer_write_dynamic_static( - %arg0: tensor, %size: index, %padding: index) -> tensor { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5.0 : f32 - %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1] - : tensor to tensor - %0 = tensor.pad %s low[0, 0] high[%padding, 7] { - ^bb0(%arg2: index, %arg3: index): - tensor.yield %c5 : f32 - } : tensor to tensor - %1 = call @make_vector() : () -> vector<7x9xf32> - %2 = vector.transfer_write %1, %0[%c0, %c0] - : vector<7x9xf32>, tensor - %3 = tensor.extract_slice %2[0, 0] [%size, 6] [1, 1] : tensor to tensor - return %3 : tensor -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - - -// ----- - -func.func private @make_vector() -> tensor<12x13xf32> - -// CHECK-LABEL: func @pad_and_insert_slice_source -// CHECK-SAME: %[[ARG0:.*]]: tensor<5x6xf32> -// CHECK-NOT: tensor.pad -// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[C5:.*]] = arith.constant 5.0 -// CHECK: %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[VEC0]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32> -// CHECK: return %[[WRITE]] -func.func @pad_and_insert_slice_source( - %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> { - %c0 = arith.constant 0 : index - %c5 = arith.constant 5.0 : f32 - %0 = tensor.pad %arg0 low[0, 0] high[2, 3] { - ^bb0(%arg2: index, %arg3: index): - tensor.yield %c5 : f32 - } : tensor<5x6xf32> to tensor<7x9xf32> - %1 = call @make_vector() : () -> tensor<12x13xf32> - %r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32> - return %r : tensor<12x13xf32> -} - -module attributes {transform.with_named_sequence} { - transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { - %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op - %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op - %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op - transform.yield - } -} - - // ----- func.func private @make_vector() -> tensor<12x13xf32> From 14db06946839729befd6bd3ced8142547f5fd139 Mon Sep 17 00:00:00 2001 From: ssijaric-nv Date: Fri, 25 Oct 2024 10:47:39 -0700 Subject: [PATCH 043/425] [InstCombine] Fix a cycle when folding fneg(select) with scalable vector types (#112465) The two folding operations are causing a cycle for the following case with scalable vector types: define @test_fneg_select_abs( %cond, %b) { %1 = select %cond, zeroinitializer, %b %2 = fneg fast %1 ret %2 } 1) fold fneg: -(Cond ? C : Y) -> Cond ? -C : -Y 2) fold select: (Cond ? -X : -Y) -> -(Cond ? X : Y) 1) results in the following since ' zeroinitializer' passes the check for the immediate constant: %.neg = fneg fast zeroinitializer %b.neg = fneg fast %b %1 = select fast %cond, %.neg, %b.neg and so we end up going back and forth between 1) and 2). Attempt to fold scalable vector constants, so that we end up with a splat instead: define @test_fneg_select_abs( %cond, %b) { %b.neg = fneg fast %b %1 = select fast %cond, shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer), %b.neg ret %1 } --- llvm/lib/IR/ConstantFold.cpp | 29 ++++++++++--------- llvm/test/Transforms/InstCombine/fneg.ll | 32 +++++++++++++++++++++ llvm/test/Transforms/InstSimplify/fp-nan.ll | 6 ++-- 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index 57d9a03c9c22b83..07dfbc41e79b005 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -581,26 +581,27 @@ Constant *llvm::ConstantFoldUnaryInstruction(unsigned Opcode, Constant *C) { case Instruction::FNeg: return ConstantFP::get(C->getContext(), neg(CV)); } - } else if (auto *VTy = dyn_cast(C->getType())) { - - Type *Ty = IntegerType::get(VTy->getContext(), 32); + } else if (auto *VTy = dyn_cast(C->getType())) { // Fast path for splatted constants. if (Constant *Splat = C->getSplatValue()) if (Constant *Elt = ConstantFoldUnaryInstruction(Opcode, Splat)) return ConstantVector::getSplat(VTy->getElementCount(), Elt); - // Fold each element and create a vector constant from those constants. - SmallVector Result; - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *ExtractIdx = ConstantInt::get(Ty, i); - Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx); - Constant *Res = ConstantFoldUnaryInstruction(Opcode, Elt); - if (!Res) - return nullptr; - Result.push_back(Res); - } + if (auto *FVTy = dyn_cast(VTy)) { + // Fold each element and create a vector constant from those constants. + Type *Ty = IntegerType::get(FVTy->getContext(), 32); + SmallVector Result; + for (unsigned i = 0, e = FVTy->getNumElements(); i != e; ++i) { + Constant *ExtractIdx = ConstantInt::get(Ty, i); + Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx); + Constant *Res = ConstantFoldUnaryInstruction(Opcode, Elt); + if (!Res) + return nullptr; + Result.push_back(Res); + } - return ConstantVector::get(Result); + return ConstantVector::get(Result); + } } // We don't know how to fold this. diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll index 3c4088832feaaa6..6a9b3309bb347ec 100644 --- a/llvm/test/Transforms/InstCombine/fneg.ll +++ b/llvm/test/Transforms/InstCombine/fneg.ll @@ -1109,4 +1109,36 @@ define float @test_fneg_select_maxnum(float %x) { ret float %neg } +; Check that there's no infinite loop. +define @test_fneg_select_svec( %cond, %b) { +; CHECK-LABEL: @test_fneg_select_svec( +; CHECK-NEXT: [[TMP2:%.*]] = fneg fast [[TMP1:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = select fast [[COND:%.*]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer), [[TMP2]] +; CHECK-NEXT: ret [[TMP3]] +; + %1 = select %cond, zeroinitializer, %b + %2 = fneg fast %1 + ret %2 +} + +define @test_fneg_select_svec_2( %cond, %a) { +; CHECK-LABEL: @test_fneg_select_svec_2( +; CHECK-NEXT: [[A_NEG:%.*]] = fneg fast [[A:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select fast [[COND:%.*]], [[A_NEG]], shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: ret [[TMP1]] +; + %1 = select %cond, %a, zeroinitializer + %2 = fneg fast %1 + ret %2 +} + +define @test_fneg_select_svec_3( %cond, %b) { +; CHECK-LABEL: @test_fneg_select_svec_3( +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, double -0.000000e+00, i64 0), poison, zeroinitializer) +; + %1 = select %cond, zeroinitializer, zeroinitializer + %2 = fneg fast %1 + ret %2 +} + !0 = !{} diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll index bb557500822c143..06b23200bafff81 100644 --- a/llvm/test/Transforms/InstSimplify/fp-nan.ll +++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll @@ -237,8 +237,7 @@ define <2 x double> @unary_fneg_nan_2(<2 x double> %x) { ; FIXME: This doesn't behave the same way as the fixed-length vectors above define @unary_fneg_nan_2_scalable_vec_0() { ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_0( -; CHECK-NEXT: [[R:%.*]] = fneg shufflevector ( insertelement ( poison, double 0xFFF1234567890ABC, i64 0), poison, zeroinitializer) -; CHECK-NEXT: ret [[R]] +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, double 0x7FF1234567890ABC, i64 0), poison, zeroinitializer) ; %r = fneg splat (double 0xFFF1234567890ABC) ret %r @@ -247,8 +246,7 @@ define @unary_fneg_nan_2_scalable_vec_0() { ; FIXME: This doesn't behave the same way as the fixed-length vectors above define @unary_fneg_nan_2_scalable_vec_1() { ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_1( -; CHECK-NEXT: [[R:%.*]] = fneg shufflevector ( insertelement ( poison, double 0x7FF0000000000001, i64 0), poison, zeroinitializer) -; CHECK-NEXT: ret [[R]] +; CHECK-NEXT: ret shufflevector ( insertelement ( poison, double 0xFFF0000000000001, i64 0), poison, zeroinitializer) ; %r = fneg splat (double 0x7FF0000000000001) ret %r From 843c2fbe7f983c2a2059f753e4494f06fb645a9e Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Fri, 25 Oct 2024 18:57:01 +0100 Subject: [PATCH 044/425] Add parser+semantics support for scope construct (#113700) Test parsing, semantics and a couple of basic semantic checks for block/worksharing constructs. Add TODO message in lowering. --- .../flang/Semantics/openmp-directive-sets.h | 2 ++ flang/lib/Lower/OpenMP/OpenMP.cpp | 12 ++++++++++ flang/lib/Parser/openmp-parsers.cpp | 1 + flang/lib/Parser/unparse.cpp | 3 +++ flang/lib/Semantics/check-omp-structure.cpp | 7 +++++- flang/lib/Semantics/resolve-directives.cpp | 2 ++ flang/test/Lower/OpenMP/Todo/scope.f90 | 13 ++++++++++ flang/test/Parser/OpenMP/scope.f90 | 24 +++++++++++++++++++ .../test/Semantics/OpenMP/invalid-branch.f90 | 8 +++++++ flang/test/Semantics/OpenMP/nested01.f90 | 7 ++++++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 10 +++++++- 11 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 flang/test/Lower/OpenMP/Todo/scope.f90 create mode 100644 flang/test/Parser/OpenMP/scope.f90 diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h index 8eb736bb098fe4e..50d6d5b59ef7dd9 100644 --- a/flang/include/flang/Semantics/openmp-directive-sets.h +++ b/flang/include/flang/Semantics/openmp-directive-sets.h @@ -211,6 +211,7 @@ static const OmpDirectiveSet blockConstructSet{ Directive::OMPD_parallel, Directive::OMPD_parallel_masked, Directive::OMPD_parallel_workshare, + Directive::OMPD_scope, Directive::OMPD_single, Directive::OMPD_target, Directive::OMPD_target_data, @@ -281,6 +282,7 @@ static const OmpDirectiveSet workShareSet{ Directive::OMPD_workshare, Directive::OMPD_parallel_workshare, Directive::OMPD_parallel_sections, + Directive::OMPD_scope, Directive::OMPD_sections, Directive::OMPD_single, } | allDoSet, diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index fc54da8babe63e9..01a40d6e2204ef2 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1650,6 +1650,15 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, return sectionsOp; } +static void genScopeOp(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + TODO(loc, "Scope construct"); +} + static mlir::omp::SingleOp genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, @@ -2478,6 +2487,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_simd: genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item); break; + case llvm::omp::Directive::OMPD_scope: + genScopeOp(converter, symTable, semaCtx, eval, loc, queue, item); + break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item); break; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 59a8757e58e8cc4..e740c421ca80276 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -697,6 +697,7 @@ TYPE_PARSER(construct(first( "PARALLEL MASKED" >> pure(llvm::omp::Directive::OMPD_parallel_masked), "PARALLEL WORKSHARE" >> pure(llvm::omp::Directive::OMPD_parallel_workshare), "PARALLEL" >> pure(llvm::omp::Directive::OMPD_parallel), + "SCOPE" >> pure(llvm::omp::Directive::OMPD_scope), "SINGLE" >> pure(llvm::omp::Directive::OMPD_single), "TARGET DATA" >> pure(llvm::omp::Directive::OMPD_target_data), "TARGET PARALLEL" >> pure(llvm::omp::Directive::OMPD_target_parallel), diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 04df988223e8f8d..19ceb2a3ebc3178 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2386,6 +2386,9 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_parallel: Word("PARALLEL "); break; + case llvm::omp::Directive::OMPD_scope: + Word("SCOPE "); + break; case llvm::omp::Directive::OMPD_single: Word("SINGLE "); break; diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 46486907ceb9e1f..1c2cf304d0ee95f 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -972,6 +972,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { HasInvalidWorksharingNesting( beginDir.source, llvm::omp::nestedWorkshareErrSet); break; + case llvm::omp::Directive::OMPD_scope: case llvm::omp::Directive::OMPD_single: // TODO: This check needs to be extended while implementing nesting of // regions checks. @@ -1864,6 +1865,9 @@ void OmpStructureChecker::Enter(const parser::OmpEndBlockDirective &x) { const auto &dir{std::get(x.t)}; ResetPartialContext(dir.source); switch (dir.v) { + case llvm::omp::Directive::OMPD_scope: + PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_end_scope); + break; // 2.7.3 end-single-clause -> copyprivate-clause | // nowait-clause case llvm::omp::Directive::OMPD_single: @@ -1886,7 +1890,8 @@ void OmpStructureChecker::Enter(const parser::OmpEndBlockDirective &x) { // end_workshareare popped as they are pushed while entering the // EndBlockDirective. void OmpStructureChecker::Leave(const parser::OmpEndBlockDirective &x) { - if ((GetContext().directive == llvm::omp::Directive::OMPD_end_single) || + if ((GetContext().directive == llvm::omp::Directive::OMPD_end_scope) || + (GetContext().directive == llvm::omp::Directive::OMPD_end_single) || (GetContext().directive == llvm::omp::Directive::OMPD_end_workshare)) { dirContext_.pop_back(); } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 33936ba4c2b34f1..513e42bee976a9a 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -1526,6 +1526,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { case llvm::omp::Directive::OMPD_master: case llvm::omp::Directive::OMPD_ordered: case llvm::omp::Directive::OMPD_parallel: + case llvm::omp::Directive::OMPD_scope: case llvm::omp::Directive::OMPD_single: case llvm::omp::Directive::OMPD_target: case llvm::omp::Directive::OMPD_target_data: @@ -1557,6 +1558,7 @@ void OmpAttributeVisitor::Post(const parser::OpenMPBlockConstruct &x) { case llvm::omp::Directive::OMPD_masked: case llvm::omp::Directive::OMPD_parallel_masked: case llvm::omp::Directive::OMPD_parallel: + case llvm::omp::Directive::OMPD_scope: case llvm::omp::Directive::OMPD_single: case llvm::omp::Directive::OMPD_target: case llvm::omp::Directive::OMPD_task: diff --git a/flang/test/Lower/OpenMP/Todo/scope.f90 b/flang/test/Lower/OpenMP/Todo/scope.f90 new file mode 100644 index 000000000000000..16a067dc8f256be --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/scope.f90 @@ -0,0 +1,13 @@ +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s -fopenmp-version=51 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s -fopenmp-version=51 2>&1 | FileCheck %s + +! CHECK: not yet implemented: Scope construct +program omp_scope + integer i + i = 10 + + !$omp scope private(i) + print *, "omp scope", i + !$omp end scope + +end program omp_scope diff --git a/flang/test/Parser/OpenMP/scope.f90 b/flang/test/Parser/OpenMP/scope.f90 new file mode 100644 index 000000000000000..6574136311e7187 --- /dev/null +++ b/flang/test/Parser/OpenMP/scope.f90 @@ -0,0 +1,24 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s + +program omp_scope + integer i + i = 10 + +!CHECK: !$OMP SCOPE PRIVATE(i) +!CHECK: !$OMP END SCOPE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct +!PARSE-TREE: OmpBeginBlockDirective +!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = scope +!PARSE-TREE: OmpClauseList -> OmpClause -> Private -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'i' +!PARSE-TREE: Block +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> PrintStmt +!PARSE-TREE: OmpEndBlockDirective +!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = scope +!PARSE-TREE: OmpClauseList -> OmpClause -> Nowait + + !$omp scope private(i) + print *, "omp scope", i + !$omp end scope nowait +end program omp_scope diff --git a/flang/test/Semantics/OpenMP/invalid-branch.f90 b/flang/test/Semantics/OpenMP/invalid-branch.f90 index ed9e4d268f65a8c..28aab8b122f3f2c 100644 --- a/flang/test/Semantics/OpenMP/invalid-branch.f90 +++ b/flang/test/Semantics/OpenMP/invalid-branch.f90 @@ -105,4 +105,12 @@ program omp_invalid_branch !$omp end parallel 9 print *, "2nd alternate return" + !CHECK: invalid branch into an OpenMP structured block + goto 100 + !$omp scope + 100 continue + !CHECK: invalid branch leaving an OpenMP structured block + goto 200 + !$omp end scope + 200 continue end program diff --git a/flang/test/Semantics/OpenMP/nested01.f90 b/flang/test/Semantics/OpenMP/nested01.f90 index 49c964ab86aa6bd..0936e4c1b45a5db 100644 --- a/flang/test/Semantics/OpenMP/nested01.f90 +++ b/flang/test/Semantics/OpenMP/nested01.f90 @@ -25,6 +25,13 @@ !$omp end target enddo + !$omp do + do i = 1, N + !ERROR: A worksharing region may not be closely nested inside a worksharing, explicit task, taskloop, critical, ordered, atomic, or master region + !$omp scope + !$omp end scope + end do + !$omp end do !$omp do do i = 1, N diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 1834ad4d037f3d9..d592f369a17f92c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -892,7 +892,7 @@ def OMP_Scan : Directive<"scan"> { let association = AS_Separating; let category = CA_Subsidiary; } -def OMP_scope : Directive<"scope"> { +def OMP_Scope : Directive<"scope"> { let allowedClauses = [ VersionedClause, VersionedClause, @@ -905,6 +905,14 @@ def OMP_scope : Directive<"scope"> { let association = AS_Block; let category = CA_Executable; } +def OMP_EndScope : Directive<"end scope"> { + let allowedOnceClauses = [ + VersionedClause, + ]; + let leafConstructs = OMP_Scope.leafConstructs; + let association = OMP_Scope.association; + let category = OMP_Scope.category; +} def OMP_Section : Directive<"section"> { let association = AS_Separating; let category = CA_Subsidiary; From 144ddca9ed6a439ad8a421c3ff2ea763532341ba Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Fri, 25 Oct 2024 11:09:57 -0700 Subject: [PATCH 045/425] [MemProf] Avoid duplicate edges between nodes (#113337) The recent change to add support for cloning indirect calls inadvertantly caused duplicate edges to be created between the same caller/callee pair. This is due to the new moveCalleeEdgeToNewCaller not properly guarding the addition of a new edge (ironically I was testing for that in an assertion, but failed to handle that case specially otherwise). Now simply move the context ids over to any existing edge. This issue in turn led to some assumptions in cloning being violated, resulting in a later crash. Add a test for this case to checkNode. --- .../IPO/MemProfContextDisambiguation.cpp | 21 +++++++++++- llvm/test/ThinLTO/X86/memprof-icp.ll | 34 +++++++++++++++---- 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 4efd683dfca3633..905186edcbecc40 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1352,6 +1352,17 @@ static void checkNode(const ContextNode *Node, } assert(NodeContextIds == CalleeEdgeContextIds); } + // FIXME: Since this checking is only invoked under an option, we should + // change the error checking from using assert to something that will trigger + // an error on a release build. +#ifndef NDEBUG + // Make sure we don't end up with duplicate edges between the same caller and + // callee. + DenseSet *> NodeSet; + for (const auto &E : Node->CalleeEdges) + NodeSet.insert(E->Callee); + assert(NodeSet.size() == Node->CalleeEdges.size()); +#endif } template @@ -3125,7 +3136,15 @@ void CallsiteContextGraph:: // from the same callers as the old node. That should be true in the current // use case, where we will remove None-type edges after copying over all // caller edges from the callee. - assert(IsNewNode || NewCaller->findEdgeFromCaller(OldCallerEdge->Caller)); + auto *ExistingCallerEdge = + NewCaller->findEdgeFromCaller(OldCallerEdge->Caller); + assert(IsNewNode || ExistingCallerEdge); + if (ExistingCallerEdge) { + ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(), + EdgeContextIdsToMove.end()); + ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove); + continue; + } auto NewEdge = std::make_shared( NewCaller, OldCallerEdge->Caller, computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove); diff --git a/llvm/test/ThinLTO/X86/memprof-icp.ll b/llvm/test/ThinLTO/X86/memprof-icp.ll index f17e19e1f77ef25..99e071898765567 100644 --- a/llvm/test/ThinLTO/X86/memprof-icp.ll +++ b/llvm/test/ThinLTO/X86/memprof-icp.ll @@ -186,9 +186,13 @@ ; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1 ; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold ; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold +; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold +; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold ; REMARKS-MAIN: created clone _ZN1B3barEj.memprof.1 ; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold ; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold +; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold +; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold ; REMARKS-FOO: created clone _Z3fooR2B0j.memprof.1 ;; In each version of foo we should have promoted the indirect call to two conditional ;; direct calls, one to B::bar and one to B0::bar. The cloned version of foo should call @@ -208,10 +212,10 @@ ; REMARKS-FOO: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold ; REMARKS-FOO: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold -; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis -; STATS-BE: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend -; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis -; STATS-BE: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis +; STATS-BE: 8 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis +; STATS-BE: 8 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend ; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis ; STATS-BE: 5 memprof-context-disambiguation - Number of function clones created during ThinLTO backend @@ -247,8 +251,8 @@ ; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold" ; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold" -; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend -; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend +; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend ; STATS-BE-DISTRIB: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend ;--- foo.ll @@ -298,6 +302,9 @@ declare i32 @_Z3fooR2B0j(ptr, i32) define i32 @_ZN2B03barEj(ptr %this, i32 %s) { entry: %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !33, !callsite !38 + ;; Second allocation in this function, to ensure that indirect edges to the + ;; same callee are partitioned correctly. + %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !45, !callsite !50 store volatile i32 0, ptr %call, align 4 ret i32 0 } @@ -311,6 +318,9 @@ declare void @_ZdlPvm() define i32 @_ZN1B3barEj(ptr %this, i32 %s) { entry: %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !39, !callsite !44 + ;; Second allocation in this function, to ensure that indirect edges to the + ;; same callee are partitioned correctly. + %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !51, !callsite !56 store volatile i32 0, ptr %call, align 4 ret i32 0 } @@ -367,3 +377,15 @@ attributes #0 = { builtin allocsize(0) } !42 = !{!43, !"cold"} !43 = !{i64 4457553070050523782, i64 -2101080423462424381, i64 -6490791336773930154} !44 = !{i64 4457553070050523782} +!45 = !{!46, !48} +!46 = !{!47, !"notcold"} +!47 = !{i64 456, i64 -2101080423462424381, i64 5188446645037944434} +!48 = !{!49, !"cold"} +!49 = !{i64 456, i64 -2101080423462424381, i64 5583420417449503557} +!50 = !{i64 456} +!51 = !{!52, !54} +!52 = !{!53, !"notcold"} +!53 = !{i64 789, i64 -2101080423462424381, i64 132626519179914298} +!54 = !{!55, !"cold"} +!55 = !{i64 789, i64 -2101080423462424381, i64 -6490791336773930154} +!56 = !{i64 789} From f4db221258cb44a8f9804ce852c0403328de39b2 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 25 Oct 2024 11:12:41 -0700 Subject: [PATCH 046/425] [libc++][test] Use `ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings)` for `-Wno-psabi` (#113608) MSVC doesn't understand `-Wno-psabi`, which was introduced here by @ldionne in #106077. Using `ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings)` (implemented by #75317) avoids passing this to MSVC. --- .../std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp index 5130758d5efd52d..abb12d6a3c24730 100644 --- a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp +++ b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp @@ -18,7 +18,7 @@ // Ignore diagnostic about vector types changing the ABI on some targets, since // that is irrelevant for this test. -// ADDITIONAL_COMPILE_FLAGS: -Wno-psabi +// ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings): -Wno-psabi #include #include From a0c318938a528cfbef509a2516b36dd2411a52b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= <41161573+gbossu@users.noreply.github.com> Date: Fri, 25 Oct 2024 20:19:22 +0200 Subject: [PATCH 047/425] [CodeGen][NFC] Properly split MachineLICM and EarlyMachineLICM (#113573) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both are based on MachineLICMBase, and the functionality there is "switched" based on a PreRegAlloc flag. This commit is simply about trusting the original value of that flag, defined by the `MachineLICM` and `EarlyMachineLICM` classes. The `PreRegAlloc` flag used to be overwritten it based on MRI.isSSA(), which is un-reliable due to how it is inferred by the MIRParser. I see that we can now define isSSA in MIR (thanks @gargaroff ), meaning the fix isn’t really needed anymore, but redefining that flag still feels wrong. Note that I'm looking into upstreaming more changes to MachineLICM, see [the discourse thread](https://discourse.llvm.org/t/extending-post-regalloc-machinelicm/82725). --- llvm/lib/CodeGen/MachineLICM.cpp | 6 ------ llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir | 9 +-------- llvm/test/CodeGen/AMDGPU/licm-regpressure.mir | 4 ++-- llvm/test/CodeGen/AMDGPU/licm-valu.mir | 4 ++-- llvm/test/CodeGen/X86/unfoldMemoryOperand.mir | 2 +- llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir | 4 ++-- 6 files changed, 8 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 793ad75759ccb86..7ea07862b839d02 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -391,12 +391,6 @@ bool MachineLICMImpl::run(MachineFunction &MF) { MRI = &MF.getRegInfo(); SchedModel.init(&ST); - // FIXME: Remove this assignment or convert to an assert? (dead variable PreRegAlloc) - // MachineLICM and PostRAMachineLICM were distinguished by introducing - // EarlyMachineLICM and MachineLICM respectively to avoid "using an unreliable - // MRI::isSSA() check to determine whether register allocation has happened" - // (See 4a7c8e7). - PreRegAlloc = MRI->isSSA(); HasProfileData = MF.getFunction().hasProfileData(); if (PreRegAlloc) diff --git a/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir b/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir index 406025c4fde3022..90ff68d30a3a0e5 100644 --- a/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir +++ b/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir @@ -3,9 +3,6 @@ --- name: test tracksRegLiveness: true -isSSA: false -registers: - - { id: 0, class: gpr64 } stack: - { id: 0, size: 8, type: spill-slot } body: | @@ -30,14 +27,11 @@ body: | bb.2: liveins: $x0 - %0 = COPY $x0 ... + --- name: test2 tracksRegLiveness: true -isSSA: false -registers: - - { id: 0, class: gpr64 } stack: - { id: 0, size: 8, type: spill-slot } body: | @@ -62,5 +56,4 @@ body: | bb.2: liveins: $x0 - %0 = COPY $x0 ... diff --git a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir index e63009fdcb43cf2..dd478f94e1039ec 100644 --- a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir +++ b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass machinelicm -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes machinelicm -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass early-machinelicm -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes early-machinelicm -o - %s | FileCheck -check-prefix=GCN %s # MachineLICM shall limit hoisting of V_CVT instructions out of the loop keeping # register pressure within the budget. VGPR budget at occupancy 10 is 24 vgprs. diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir index b4f5e057f532b51..6a28eee19d503cf 100644 --- a/llvm/test/CodeGen/AMDGPU/licm-valu.mir +++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=machinelicm -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=early-machinelicm -o - %s | FileCheck -check-prefix=GCN %s --- name: hoist_move diff --git a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir index ff3d9ca378dbd52..135b14d6836a090 100644 --- a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir +++ b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-- -passes machinelicm -mcpu=skx -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-- -passes early-machinelicm -mcpu=skx -verify-machineinstrs -o - %s | FileCheck %s --- | @x = dso_local global i32 0, align 4 @z = dso_local local_unnamed_addr global [1024 x i32] zeroinitializer, align 16 diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir index d4d59e14724ebe7..b65a0e71af1dd2d 100644 --- a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir +++ b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir @@ -1,6 +1,6 @@ --- | - ; RUN: llc -run-pass=machinelicm -o - %s | FileCheck %s - ; RUN: llc -passes=machinelicm -o - %s | FileCheck %s + ; RUN: llc -run-pass=early-machinelicm -o - %s | FileCheck %s + ; RUN: llc -passes=early-machinelicm -o - %s | FileCheck %s ; Line numbers should not be retained when loop invariant instructions are hoisted. ; Doing so causes poor stepping bevavior. ; From eccdb2489483ca58d2cb35bc38967a8e33117575 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 25 Oct 2024 13:19:58 -0500 Subject: [PATCH 048/425] [OpenMP] Create versioned libgomp softlinks (#112973) Add libgomp.1.dylib for MacOS and libgomp.so.1 for Linux Linkers on Mac and Linux pick up versioned libgomp dynamic library files. The existing softlinks (libgomp.dylib for MacOS and libgomp.so for Linux) are insufficient. This helps alleviate the issue of mixing libgomp and libomp at runtime. --- openmp/runtime/src/CMakeLists.txt | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt index 439cc20963a1298..61c0bacc9f20629 100644 --- a/openmp/runtime/src/CMakeLists.txt +++ b/openmp/runtime/src/CMakeLists.txt @@ -253,6 +253,17 @@ if(NOT WIN32) libiomp5${LIBOMP_LIBRARY_SUFFIX} WORKING_DIRECTORY ${LIBOMP_LIBRARY_DIR} ) + if(LIBOMP_ENABLE_SHARED) + if(APPLE) + set(VERSIONED_LIBGOMP_NAME libgomp.1${LIBOMP_LIBRARY_SUFFIX}) + else() + set(VERSIONED_LIBGOMP_NAME libgomp${LIBOMP_LIBRARY_SUFFIX}.1) + endif() + add_custom_command(TARGET omp POST_BUILD + COMMAND ${CMAKE_COMMAND} -E create_symlink ${LIBOMP_LIB_FILE} ${VERSIONED_LIBGOMP_NAME} + WORKING_DIRECTORY ${LIBOMP_LIBRARY_DIR} + ) + endif() endif() # Definitions for testing, for reuse when testing libomptarget-nvptx. @@ -439,13 +450,18 @@ else() if(${LIBOMP_INSTALL_ALIASES}) # Create aliases (symlinks) of the library for backwards compatibility + extend_path(outdir "${CMAKE_INSTALL_PREFIX}" "${OPENMP_INSTALL_LIBDIR}") set(LIBOMP_ALIASES "libgomp;libiomp5") foreach(alias IN LISTS LIBOMP_ALIASES) - extend_path(outdir "${CMAKE_INSTALL_PREFIX}" "${OPENMP_INSTALL_LIBDIR}") install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E create_symlink \"${LIBOMP_LIB_FILE}\" \"${alias}${LIBOMP_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"\$ENV{DESTDIR}${outdir}\")") endforeach() + if(LIBOMP_ENABLE_SHARED) + install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E create_symlink \"${LIBOMP_LIB_FILE}\" + \"${VERSIONED_LIBGOMP_NAME}\" WORKING_DIRECTORY + \"\$ENV{DESTDIR}${outdir}\")") + endif() endif() endif() install( From 88cc7ac0cc43a739c25f6988c1bfe3949ca4da62 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 25 Oct 2024 14:21:43 -0400 Subject: [PATCH 049/425] [libc++][NFC] Remove unused functions from posix_l_fallbacks (#113709) --- .../__support/xlocale/__posix_l_fallback.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/libcxx/include/__support/xlocale/__posix_l_fallback.h b/libcxx/include/__support/xlocale/__posix_l_fallback.h index 8a3a6f27f48dde9..c83589181747094 100644 --- a/libcxx/include/__support/xlocale/__posix_l_fallback.h +++ b/libcxx/include/__support/xlocale/__posix_l_fallback.h @@ -25,24 +25,10 @@ # include #endif -inline _LIBCPP_HIDE_FROM_ABI int isalnum_l(int __c, locale_t) { return ::isalnum(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int isalpha_l(int __c, locale_t) { return ::isalpha(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int iscntrl_l(int __c, locale_t) { return ::iscntrl(__c); } - inline _LIBCPP_HIDE_FROM_ABI int isdigit_l(int __c, locale_t) { return ::isdigit(__c); } -inline _LIBCPP_HIDE_FROM_ABI int isgraph_l(int __c, locale_t) { return ::isgraph(__c); } - inline _LIBCPP_HIDE_FROM_ABI int islower_l(int __c, locale_t) { return ::islower(__c); } -inline _LIBCPP_HIDE_FROM_ABI int isprint_l(int __c, locale_t) { return ::isprint(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int ispunct_l(int __c, locale_t) { return ::ispunct(__c); } - -inline _LIBCPP_HIDE_FROM_ABI int isspace_l(int __c, locale_t) { return ::isspace(__c); } - inline _LIBCPP_HIDE_FROM_ABI int isupper_l(int __c, locale_t) { return ::isupper(__c); } inline _LIBCPP_HIDE_FROM_ABI int isxdigit_l(int __c, locale_t) { return ::isxdigit(__c); } @@ -52,8 +38,6 @@ inline _LIBCPP_HIDE_FROM_ABI int toupper_l(int __c, locale_t) { return ::toupper inline _LIBCPP_HIDE_FROM_ABI int tolower_l(int __c, locale_t) { return ::tolower(__c); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -inline _LIBCPP_HIDE_FROM_ABI int iswalnum_l(wint_t __c, locale_t) { return ::iswalnum(__c); } - inline _LIBCPP_HIDE_FROM_ABI int iswalpha_l(wint_t __c, locale_t) { return ::iswalpha(__c); } inline _LIBCPP_HIDE_FROM_ABI int iswblank_l(wint_t __c, locale_t) { return ::iswblank(__c); } @@ -62,8 +46,6 @@ inline _LIBCPP_HIDE_FROM_ABI int iswcntrl_l(wint_t __c, locale_t) { return ::isw inline _LIBCPP_HIDE_FROM_ABI int iswdigit_l(wint_t __c, locale_t) { return ::iswdigit(__c); } -inline _LIBCPP_HIDE_FROM_ABI int iswgraph_l(wint_t __c, locale_t) { return ::iswgraph(__c); } - inline _LIBCPP_HIDE_FROM_ABI int iswlower_l(wint_t __c, locale_t) { return ::iswlower(__c); } inline _LIBCPP_HIDE_FROM_ABI int iswprint_l(wint_t __c, locale_t) { return ::iswprint(__c); } From 4ac0e7e400fe2a66d1fd5d5d1fa1c899dfb16716 Mon Sep 17 00:00:00 2001 From: Gang Chen Date: Fri, 25 Oct 2024 11:24:47 -0700 Subject: [PATCH 050/425] [AMDGPU] Add a type for the named barrier (#113614) --- clang/include/clang/Basic/AMDGPUTypes.def | 8 ++++ clang/lib/CodeGen/CGDebugInfo.cpp | 7 ++++ clang/lib/CodeGen/CodeGenTypes.cpp | 4 ++ clang/test/AST/ast-dump-amdgpu-types.c | 13 ++++-- .../CodeGen/amdgpu-barrier-type-debug-info.c | 8 ++++ .../CodeGenCXX/amdgpu-barrier-typeinfo.cpp | 10 +++++ clang/test/CodeGenHIP/amdgpu-barrier-type.hip | 42 +++++++++++++++++++ clang/test/SemaCXX/amdgpu-barrier.cpp | 17 ++++++++ clang/test/SemaHIP/amdgpu-barrier.hip | 20 +++++++++ clang/test/SemaOpenCL/amdgpu-barrier.cl | 12 ++++++ clang/test/SemaOpenMP/amdgpu-barrier.cpp | 17 ++++++++ llvm/lib/IR/Type.cpp | 14 +++++++ 12 files changed, 168 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGen/amdgpu-barrier-type-debug-info.c create mode 100644 clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp create mode 100644 clang/test/CodeGenHIP/amdgpu-barrier-type.hip create mode 100644 clang/test/SemaCXX/amdgpu-barrier.cpp create mode 100644 clang/test/SemaHIP/amdgpu-barrier.hip create mode 100644 clang/test/SemaOpenCL/amdgpu-barrier.cl create mode 100644 clang/test/SemaOpenMP/amdgpu-barrier.cpp diff --git a/clang/include/clang/Basic/AMDGPUTypes.def b/clang/include/clang/Basic/AMDGPUTypes.def index e47e544fdc82c1c..d3dff446f9edf01 100644 --- a/clang/include/clang/Basic/AMDGPUTypes.def +++ b/clang/include/clang/Basic/AMDGPUTypes.def @@ -15,7 +15,15 @@ AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) #endif +#ifndef AMDGPU_NAMED_BARRIER_TYPE +#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \ + AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) +#endif + AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8) +AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0) + #undef AMDGPU_TYPE #undef AMDGPU_OPAQUE_PTR_TYPE +#undef AMDGPU_NAMED_BARRIER_TYPE diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 59a761c2303c951..5fd6cfa63e6efab 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -916,6 +916,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) { TheCU, TheCU->getFile(), 0); \ return SingletonId; \ } +#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \ + case BuiltinType::Id: { \ + if (!SingletonId) \ + SingletonId = \ + DBuilder.createBasicType(Name, Width, llvm::dwarf::DW_ATE_unsigned); \ + return SingletonId; \ + } #include "clang/Basic/AMDGPUTypes.def" case BuiltinType::UChar: case BuiltinType::Char_U: diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index f87184fc77832ca..09191a4901f4932 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -564,6 +564,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { #define AMDGPU_OPAQUE_PTR_TYPE(Name, Id, SingletonId, Width, Align, AS) \ case BuiltinType::Id: \ return llvm::PointerType::get(getLLVMContext(), AS); +#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \ + case BuiltinType::Id: \ + return llvm::TargetExtType::get(getLLVMContext(), "amdgcn.named.barrier", \ + {}, {Scope}); #include "clang/Basic/AMDGPUTypes.def" #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) case BuiltinType::Id: #include "clang/Basic/HLSLIntangibleTypes.def" diff --git a/clang/test/AST/ast-dump-amdgpu-types.c b/clang/test/AST/ast-dump-amdgpu-types.c index e032d678f1a09e8..f01461cdba2374e 100644 --- a/clang/test/AST/ast-dump-amdgpu-types.c +++ b/clang/test/AST/ast-dump-amdgpu-types.c @@ -1,10 +1,15 @@ // REQUIRES: amdgpu-registered-target // Test without serialization: -// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_buffer_rsrc_t %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_buffer_rsrc_t %s | FileCheck %s -check-prefix=BUFFER-RSRC +// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_named_workgroup_barrier %s | FileCheck %s -check-prefix=WORKGROUP-BARRIER // // Test with serialization: // RUN: %clang_cc1 -triple amdgcn -emit-pch -o %t %s -// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_buffer_rsrc_t /dev/null | sed -e "s/ //" -e "s/ imported//" | FileCheck %s +// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_buffer_rsrc_t /dev/null | sed -e "s/ //" -e "s/ imported//" | FileCheck %s -check-prefix=BUFFER-RSRC +// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_named_workgroup_barrier /dev/null | sed -e "s/ //" -e "s/ imported//" | FileCheck %s -check-prefix=WORKGROUP-BARRIER -// CHECK: TypedefDecl {{.*}} implicit __amdgpu_buffer_rsrc_t -// CHECK-NEXT: -BuiltinType {{.*}} '__amdgpu_buffer_rsrc_t' +// BUFFER-RSRC: TypedefDecl {{.*}} implicit __amdgpu_buffer_rsrc_t +// BUFFER-RSRC-NEXT: -BuiltinType {{.*}} '__amdgpu_buffer_rsrc_t' + +// WORKGROUP-BARRIER: TypedefDecl {{.*}} implicit __amdgpu_named_workgroup_barrier_t +// WORKGROUP-BARRIER-NEXT: -BuiltinType {{.*}} '__amdgpu_named_workgroup_barrier_t' diff --git a/clang/test/CodeGen/amdgpu-barrier-type-debug-info.c b/clang/test/CodeGen/amdgpu-barrier-type-debug-info.c new file mode 100644 index 000000000000000..f595f1b222c4f65 --- /dev/null +++ b/clang/test/CodeGen/amdgpu-barrier-type-debug-info.c @@ -0,0 +1,8 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s + +// CHECK: name: "__amdgpu_named_workgroup_barrier_t",{{.*}}baseType: ![[BT:[0-9]+]] +// CHECK: [[BT]] = !DIBasicType(name: "__amdgpu_named_workgroup_barrier_t", size: 128, encoding: DW_ATE_unsigned) +void test_locals(void) { + __amdgpu_named_workgroup_barrier_t k0; +} diff --git a/clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp new file mode 100644 index 000000000000000..a47f217dcd3db67 --- /dev/null +++ b/clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp @@ -0,0 +1,10 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s + +namespace std { class type_info; }; + +auto &b0 = typeid(__amdgpu_named_workgroup_barrier_t); + +// CHECK-DAG: @_ZTSu34__amdgpu_named_workgroup_barrier_t = {{.*}} c"u34__amdgpu_named_workgroup_barrier_t\00" +// CHECK-DAG: @_ZTIu34__amdgpu_named_workgroup_barrier_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu34__amdgpu_named_workgroup_barrier_t + diff --git a/clang/test/CodeGenHIP/amdgpu-barrier-type.hip b/clang/test/CodeGenHIP/amdgpu-barrier-type.hip new file mode 100644 index 000000000000000..229e8b3c737c6aa --- /dev/null +++ b/clang/test/CodeGenHIP/amdgpu-barrier-type.hip @@ -0,0 +1,42 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature + // REQUIRES: amdgpu-registered-target + // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s + +#define __shared__ __attribute__((shared)) + +__shared__ __amdgpu_named_workgroup_barrier_t bar; +__shared__ __amdgpu_named_workgroup_barrier_t arr[2]; +__shared__ struct { + __amdgpu_named_workgroup_barrier_t x; + __amdgpu_named_workgroup_barrier_t y; +} str; + +__amdgpu_named_workgroup_barrier_t *getBar(); +void useBar(__amdgpu_named_workgroup_barrier_t *); + +// CHECK-LABEL: define {{[^@]+}}@_Z7testSemPu34__amdgpu_named_workgroup_barrier_t +// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8 +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef [[TMP0]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef addrspacecast (ptr addrspace(1) @bar to ptr)) #[[ATTR2]] +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef getelementptr inbounds ([2 x target("amdgcn.named.barrier", 0)], ptr addrspacecast (ptr addrspace(1) @arr to ptr), i64 0, i64 1)) #[[ATTR2]] +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef getelementptr inbounds nuw ([[STRUCT_ANON:%.*]], ptr addrspacecast (ptr addrspace(1) @str to ptr), i32 0, i32 1)) #[[ATTR2]] +// CHECK-NEXT: [[CALL:%.*]] = call noundef ptr @_Z6getBarv() #[[ATTR2]] +// CHECK-NEXT: call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef [[CALL]]) #[[ATTR2]] +// CHECK-NEXT: [[CALL1:%.*]] = call noundef ptr @_Z6getBarv() #[[ATTR2]] +// CHECK-NEXT: ret ptr [[CALL1]] +// +__amdgpu_named_workgroup_barrier_t *testSem(__amdgpu_named_workgroup_barrier_t *p) { + useBar(p); + useBar(&bar); + useBar(&arr[1]); + useBar(&str.y); + useBar(getBar()); + return getBar(); +} diff --git a/clang/test/SemaCXX/amdgpu-barrier.cpp b/clang/test/SemaCXX/amdgpu-barrier.cpp new file mode 100644 index 000000000000000..a171433727dda41 --- /dev/null +++ b/clang/test/SemaCXX/amdgpu-barrier.cpp @@ -0,0 +1,17 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_named_workgroup_barrier_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + dynamic_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{invalid target type '__amdgpu_named_workgroup_barrier_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_named_workgroup_barrier_t'}} + __amdgpu_named_workgroup_barrier_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'void *'}} +} + +static_assert(sizeof(__amdgpu_named_workgroup_barrier_t) == 16, "wrong size"); +static_assert(alignof(__amdgpu_named_workgroup_barrier_t) == 4, "wrong alignment"); diff --git a/clang/test/SemaHIP/amdgpu-barrier.hip b/clang/test/SemaHIP/amdgpu-barrier.hip new file mode 100644 index 000000000000000..ccd99b1e2c1f261 --- /dev/null +++ b/clang/test/SemaHIP/amdgpu-barrier.hip @@ -0,0 +1,20 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s +// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s + +#define __device__ __attribute__((device)) + +__device__ void foo() { + int n = 100; + __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_named_workgroup_barrier_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + dynamic_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{invalid target type '__amdgpu_named_workgroup_barrier_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_named_workgroup_barrier_t'}} + __amdgpu_named_workgroup_barrier_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'void *'}} +} + +static_assert(sizeof(__amdgpu_named_workgroup_barrier_t) == 16, "wrong size"); +static_assert(alignof(__amdgpu_named_workgroup_barrier_t) == 4, "wrong alignment"); diff --git a/clang/test/SemaOpenCL/amdgpu-barrier.cl b/clang/test/SemaOpenCL/amdgpu-barrier.cl new file mode 100644 index 000000000000000..150c311c7c59303 --- /dev/null +++ b/clang/test/SemaOpenCL/amdgpu-barrier.cl @@ -0,0 +1,12 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -Wno-unused-value %s +// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -Wno-unused-value %s + +void foo() { + int n = 100; + __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{initializing '__private __amdgpu_named_workgroup_barrier_t' with an expression of incompatible type 'int'}} + int c = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_named_workgroup_barrier_t'}} + __amdgpu_named_workgroup_barrier_t k; + int *ip = (int *)k; // expected-error {{operand of type '__amdgpu_named_workgroup_barrier_t' where arithmetic or pointer type is required}} + void *vp = (void *)k; // expected-error {{operand of type '__amdgpu_named_workgroup_barrier_t' where arithmetic or pointer type is required}} + } diff --git a/clang/test/SemaOpenMP/amdgpu-barrier.cpp b/clang/test/SemaOpenMP/amdgpu-barrier.cpp new file mode 100644 index 000000000000000..70aaefd080885e6 --- /dev/null +++ b/clang/test/SemaOpenMP/amdgpu-barrier.cpp @@ -0,0 +1,17 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s + +void foo() { +#pragma omp target + { + int n = 100; + __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_named_workgroup_barrier_t' with an rvalue of type 'int'}} + static_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + dynamic_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{invalid target type '__amdgpu_named_workgroup_barrier_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}} + reinterpret_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}} + int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_named_workgroup_barrier_t'}} + __amdgpu_named_workgroup_barrier_t k; + int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'int *'}} + void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'void *'}} + } + } diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 912b1a3960ef196..e311cde415174a9 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -834,6 +834,14 @@ Expected TargetExtType::checkParams(TargetExtType *TTy) { "target extension type riscv.vector.tuple should have one " "type parameter and one integer parameter"); + // Opaque types in the AMDGPU name space. + if (TTy->Name == "amdgcn.named.barrier" && + (TTy->getNumTypeParameters() != 0 || TTy->getNumIntParameters() != 1)) { + return createStringError("target extension type amdgcn.named.barrier " + "should have no type parameters " + "and one integer parameter"); + } + return TTy; } @@ -879,6 +887,12 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) { if (Name.starts_with("dx.")) return TargetTypeInfo(PointerType::get(C, 0)); + // Opaque types in the AMDGPU name space. + if (Name == "amdgcn.named.barrier") { + return TargetTypeInfo(FixedVectorType::get(Type::getInt32Ty(C), 4), + TargetExtType::CanBeGlobal); + } + return TargetTypeInfo(Type::getVoidTy(C)); } From 61946687bc68ccba763571cb420049b9a3749dfe Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 25 Oct 2024 11:33:44 -0700 Subject: [PATCH 051/425] [clang][modules] Shrink the size of `Module::Headers` (#113395) This patch shrinks the size of the `Module` class from 2112B to 1624B. I wasn't able to get a good data on the actual impact on memory usage, but given my `clang-scan-deps` workload at hand (with tens of thousands of instances), I think there should be some win here. This also speeds up my benchmark by under 0.1%. --- .../modularize/CoverageChecker.cpp | 7 ++--- .../modularize/ModularizeUtilities.cpp | 14 ++------- clang/include/clang/Basic/Module.h | 31 ++++++++++++++----- clang/lib/Basic/Module.cpp | 2 +- clang/lib/Frontend/FrontendAction.cpp | 2 +- clang/lib/Lex/ModuleMap.cpp | 21 +++++++------ clang/lib/Serialization/ASTWriter.cpp | 4 +-- 7 files changed, 45 insertions(+), 36 deletions(-) diff --git a/clang-tools-extra/modularize/CoverageChecker.cpp b/clang-tools-extra/modularize/CoverageChecker.cpp index 0e76c539aa3c839..b536ee00497c03f 100644 --- a/clang-tools-extra/modularize/CoverageChecker.cpp +++ b/clang-tools-extra/modularize/CoverageChecker.cpp @@ -223,10 +223,9 @@ bool CoverageChecker::collectModuleHeaders(const Module &Mod) { return false; } - for (auto &HeaderKind : Mod.Headers) - for (auto &Header : HeaderKind) - ModuleMapHeadersSet.insert( - ModularizeUtilities::getCanonicalPath(Header.Entry.getName())); + for (const auto &Header : Mod.getAllHeaders()) + ModuleMapHeadersSet.insert( + ModularizeUtilities::getCanonicalPath(Header.Entry.getName())); for (auto *Submodule : Mod.submodules()) collectModuleHeaders(*Submodule); diff --git a/clang-tools-extra/modularize/ModularizeUtilities.cpp b/clang-tools-extra/modularize/ModularizeUtilities.cpp index b202b3aae8f8a3a..476e13770a94f6c 100644 --- a/clang-tools-extra/modularize/ModularizeUtilities.cpp +++ b/clang-tools-extra/modularize/ModularizeUtilities.cpp @@ -358,7 +358,7 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) { } else if (std::optional UmbrellaDir = Mod.getUmbrellaDirAsWritten()) { // If there normal headers, assume these are umbrellas and skip collection. - if (Mod.Headers->size() == 0) { + if (Mod.getHeaders(Module::HK_Normal).empty()) { // Collect headers in umbrella directory. if (!collectUmbrellaHeaders(UmbrellaDir->Entry.getName(), UmbrellaDependents)) @@ -371,16 +371,8 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) { // modules or because they are meant to be included by another header, // and thus should be ignored by modularize. - int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size(); - - for (int Index = 0; Index < NormalHeaderCount; ++Index) { - DependentsVector NormalDependents; - // Collect normal header. - const clang::Module::Header &Header( - Mod.Headers[clang::Module::HK_Normal][Index]); - std::string HeaderPath = getCanonicalPath(Header.Entry.getName()); - HeaderFileNames.push_back(HeaderPath); - } + for (const auto &Header : Mod.getHeaders(clang::Module::HK_Normal)) + HeaderFileNames.push_back(getCanonicalPath(Header.Entry.getName())); int MissingCountThisModule = Mod.MissingHeaders.size(); diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 9c5d33fbb562cc9..1ab3b5e5f81567f 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -253,8 +253,6 @@ class alignas(8) Module { HK_PrivateTextual, HK_Excluded }; - static const int NumHeaderKinds = HK_Excluded + 1; - /// Information about a header directive as found in the module map /// file. struct Header { @@ -263,17 +261,36 @@ class alignas(8) Module { FileEntryRef Entry; }; - /// Information about a directory name as found in the module map - /// file. +private: + static const int NumHeaderKinds = HK_Excluded + 1; + // The begin index for a HeaderKind also acts the end index of HeaderKind - 1. + // The extra element at the end acts as the end index of the last HeaderKind. + unsigned HeaderKindBeginIndex[NumHeaderKinds + 1] = {}; + SmallVector HeadersStorage; + +public: + ArrayRef
getAllHeaders() const { return HeadersStorage; } + ArrayRef
getHeaders(HeaderKind HK) const { + assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind"); + auto BeginIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK]; + auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1]; + return {BeginIt, EndIt}; + } + void addHeader(HeaderKind HK, Header H) { + assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind"); + auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1]; + HeadersStorage.insert(EndIt, std::move(H)); + for (unsigned HKI = HK + 1; HKI != NumHeaderKinds + 1; ++HKI) + ++HeaderKindBeginIndex[HKI]; + } + + /// Information about a directory name as found in the module map file. struct DirectoryName { std::string NameAsWritten; std::string PathRelativeToRootModuleDirectory; DirectoryEntryRef Entry; }; - /// The headers that are part of this module. - SmallVector Headers[5]; - /// Stored information about a header directive that was found in the /// module map file but has not been resolved to a file. struct UnresolvedHeaderDirective { diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index ad52fccff5dc7ff..a7a3f6b37efef17 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -528,7 +528,7 @@ void Module::print(raw_ostream &OS, unsigned Indent, bool Dump) const { for (auto &K : Kinds) { assert(&K == &Kinds[K.Kind] && "kinds in wrong order"); - for (auto &H : Headers[K.Kind]) { + for (auto &H : getHeaders(K.Kind)) { OS.indent(Indent + 2); OS << K.Prefix << "header \""; OS.write_escaped(H.NameAsWritten); diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 81eea9c4c4dc58e..8264bd702fe43fb 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -358,7 +358,7 @@ static std::error_code collectModuleHeaderIncludes( // Add includes for each of these headers. for (auto HK : {Module::HK_Normal, Module::HK_Private}) { - for (Module::Header &H : Module->Headers[HK]) { + for (const Module::Header &H : Module->getHeaders(HK)) { Module->addTopHeader(H.Entry); // Use the path as specified in the module map file. We'll look for this // file relative to the module build directory (the directory containing diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 0a02a63deba3dc1..bc76a54abd95adf 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -472,12 +472,12 @@ static bool violatesPrivateInclude(Module *RequestingModule, // as obtained from the lookup and as obtained from the module. // This check is not cheap, so enable it only for debugging. bool IsPrivate = false; - SmallVectorImpl *HeaderList[] = { - &Header.getModule()->Headers[Module::HK_Private], - &Header.getModule()->Headers[Module::HK_PrivateTextual]}; - for (auto *Hs : HeaderList) + ArrayRef HeaderList[] = { + Header.getModule()->getHeaders(Module::HK_Private), + Header.getModule()->getHeaders(Module::HK_PrivateTextual)}; + for (auto Hs : HeaderList) IsPrivate |= llvm::any_of( - *Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; }); + Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; }); assert(IsPrivate && "inconsistent headers and roles"); } #endif @@ -1296,27 +1296,28 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header, ModuleHeaderRole Role, bool Imported) { KnownHeader KH(Mod, Role); + FileEntryRef HeaderEntry = Header.Entry; + // Only add each header to the headers list once. // FIXME: Should we diagnose if a header is listed twice in the // same module definition? - auto &HeaderList = Headers[Header.Entry]; + auto &HeaderList = Headers[HeaderEntry]; if (llvm::is_contained(HeaderList, KH)) return; HeaderList.push_back(KH); - Mod->Headers[headerRoleToKind(Role)].push_back(Header); + Mod->addHeader(headerRoleToKind(Role), std::move(Header)); bool isCompilingModuleHeader = Mod->isForBuilding(LangOpts); if (!Imported || isCompilingModuleHeader) { // When we import HeaderFileInfo, the external source is expected to // set the isModuleHeader flag itself. - HeaderInfo.MarkFileModuleHeader(Header.Entry, Role, - isCompilingModuleHeader); + HeaderInfo.MarkFileModuleHeader(HeaderEntry, Role, isCompilingModuleHeader); } // Notify callbacks that we just added a new header. for (const auto &Cb : Callbacks) - Cb->moduleMapAddHeader(Header.Entry.getName()); + Cb->moduleMapAddHeader(HeaderEntry.getName()); } FileID ModuleMap::getContainingModuleMapFileID(const Module *Module) const { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 494890284d2f2c1..b576822fa704c89 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3070,9 +3070,9 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { Module::HK_PrivateTextual}, {SUBMODULE_EXCLUDED_HEADER, ExcludedHeaderAbbrev, Module::HK_Excluded} }; - for (auto &HL : HeaderLists) { + for (const auto &HL : HeaderLists) { RecordData::value_type Record[] = {HL.RecordKind}; - for (auto &H : Mod->Headers[HL.HeaderKind]) + for (const auto &H : Mod->getHeaders(HL.HeaderKind)) Stream.EmitRecordWithBlob(HL.Abbrev, Record, H.NameAsWritten); } From 9648271a3c5adf875680833ac74eb4bafb48678d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 25 Oct 2024 20:39:45 +0200 Subject: [PATCH 052/425] [LV] Pass flag indicating epilogue is vectorized to executePlan (NFC) This clarifies the flag, which is now only passed if the epilogue loop is being vectorized. --- .../Vectorize/LoopVectorizationPlanner.h | 8 ++++---- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 1c8d541ef2c51fd..b2745c81dec8885 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -435,9 +435,9 @@ class LoopVectorizationPlanner { /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan /// according to the best selected \p VF and \p UF. /// - /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue - /// vectorization re-using plans for both the main and epilogue vector loops. - /// It should be removed once the re-use issue has been fixed. + /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the + /// epilogue vector loop. It should be removed once the re-use issue has been + /// fixed. /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop /// to re-use expansion results generated during main plan execution. /// @@ -447,7 +447,7 @@ class LoopVectorizationPlanner { DenseMap executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, - bool IsEpilogueVectorization, + bool VectorizingEpilogue, const DenseMap *ExpandedSCEVs = nullptr); #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e1173ddd71af9c5..865f5e3d2e588da 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7626,16 +7626,16 @@ static void createAndCollectMergePhiForReduction( DenseMap LoopVectorizationPlanner::executePlan( ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan, - InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization, + InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue, const DenseMap *ExpandedSCEVs) { assert(BestVPlan.hasVF(BestVF) && "Trying to execute plan with unsupported VF"); assert(BestVPlan.hasUF(BestUF) && "Trying to execute plan with unsupported UF"); assert( - (IsEpilogueVectorization || !ExpandedSCEVs) && + ((VectorizingEpilogue && ExpandedSCEVs) || + (!VectorizingEpilogue && !ExpandedSCEVs)) && "expanded SCEVs to reuse can only be used during epilogue vectorization"); - (void)IsEpilogueVectorization; // TODO: Move to VPlan transform stage once the transition to the VPlan-based // cost model is complete for better cost estimates. @@ -7661,8 +7661,8 @@ DenseMap LoopVectorizationPlanner::executePlan( if (!ILV.getTripCount()) ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0))); else - assert(IsEpilogueVectorization && "should only re-use the existing trip " - "count during epilogue vectorization"); + assert(VectorizingEpilogue && "should only re-use the existing trip " + "count during epilogue vectorization"); // 1. Set up the skeleton for vectorization, including vector pre-header and // middle block. The vector loop is created during VPlan execution. @@ -7715,7 +7715,7 @@ DenseMap LoopVectorizationPlanner::executePlan( for (VPRecipeBase &R : *ExitVPBB) { createAndCollectMergePhiForReduction( dyn_cast(&R), State, OrigLoop, - State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs); + State.CFG.VPBB2IRBB[ExitVPBB], VectorizingEpilogue); } // 2.6. Maintain Loop Hints @@ -10233,7 +10233,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { std::unique_ptr BestMainPlan(BestPlan.duplicate()); auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, - *BestMainPlan, MainILV, DT, true); + *BestMainPlan, MainILV, DT, false); ++LoopsVectorized; // Second pass vectorizes the epilogue and adjusts the control flow From 8c4bc1e75de27adfbaead34b895b0efbaf17bd02 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 25 Oct 2024 11:44:20 -0700 Subject: [PATCH 053/425] [mlir][Transforms] Merge 1:1 and 1:N type converters (#113032) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 1:N type converter derived from the 1:1 type converter and extends it with 1:N target materializations. This commit merges the two type converters and stores 1:N target materializations in the 1:1 type converter. This is in preparation of merging the 1:1 and 1:N dialect conversion infrastructures. 1:1 target materializations (producing a single `Value`) will remain valid. An additional API is added to the type converter to register 1:N target materializations (producing a `SmallVector`). Internally, all target materializations are stored as 1:N materializations. The 1:N type converter is removed. Note for LLVM integration: If you are using the `OneToNTypeConverter`, simply switch all occurrences to `TypeConverter`. --------- Co-authored-by: Markus Böck --- .../Dialect/SparseTensor/Transforms/Passes.h | 2 +- .../mlir/Transforms/DialectConversion.h | 62 ++++++++++++++----- .../mlir/Transforms/OneToNTypeConversion.h | 45 +------------- .../ArmSME/Transforms/VectorLegalization.cpp | 2 +- .../Transforms/Utils/DialectConversion.cpp | 26 ++++++-- .../Transforms/Utils/OneToNTypeConversion.cpp | 44 +++++-------- .../TestOneToNTypeConversionPass.cpp | 18 ++++-- 7 files changed, 101 insertions(+), 98 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h index 6ccbc40bdd6034a..2e9c297f20182af 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h @@ -150,7 +150,7 @@ std::unique_ptr createLowerForeachToSCFPass(); //===----------------------------------------------------------------------===// /// Type converter for iter_space and iterator. -struct SparseIterationTypeConverter : public OneToNTypeConverter { +struct SparseIterationTypeConverter : public TypeConverter { SparseIterationTypeConverter(); }; diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 5ff36160dd61620..5e5957170e646c3 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -173,7 +173,9 @@ class TypeConverter { /// conversion has finished. /// /// Note: Target materializations may optionally accept an additional Type - /// parameter, which is the original type of the SSA value. + /// parameter, which is the original type of the SSA value. Furthermore, `T` + /// can be a TypeRange; in that case, the function must return a + /// SmallVector. /// This method registers a materialization that will be called when /// converting (potentially multiple) block arguments that were the result of @@ -210,6 +212,9 @@ class TypeConverter { /// will be invoked with: outputType = "t3", inputs = "v2", // originalType = "t1". Note that the original type "t1" cannot be recovered /// from just "t3" and "v2"; that's why the originalType parameter exists. + /// + /// Note: During a 1:N conversion, the result types can be a TypeRange. In + /// that case the materialization produces a SmallVector. template >::template arg_t<1>> void addTargetMaterialization(FnT &&callback) { @@ -316,6 +321,11 @@ class TypeConverter { Value materializeTargetConversion(OpBuilder &builder, Location loc, Type resultType, ValueRange inputs, Type originalType = {}) const; + SmallVector materializeTargetConversion(OpBuilder &builder, + Location loc, + TypeRange resultType, + ValueRange inputs, + Type originalType = {}) const; /// Convert an attribute present `attr` from within the type `type` using /// the registered conversion functions. If no applicable conversion has been @@ -340,9 +350,9 @@ class TypeConverter { /// The signature of the callback used to materialize a target conversion. /// - /// Arguments: builder, result type, inputs, location, original type - using TargetMaterializationCallbackFn = - std::function; + /// Arguments: builder, result types, inputs, location, original type + using TargetMaterializationCallbackFn = std::function( + OpBuilder &, TypeRange, ValueRange, Location, Type)>; /// The signature of the callback used to convert a type attribute. using TypeAttributeConversionCallbackFn = @@ -409,22 +419,46 @@ class TypeConverter { /// callback. /// /// With callback of form: - /// `Value(OpBuilder &, T, ValueRange, Location, Type)` + /// - Value(OpBuilder &, T, ValueRange, Location, Type) + /// - SmallVector(OpBuilder &, TypeRange, ValueRange, Location, Type) template std::enable_if_t< std::is_invocable_v, TargetMaterializationCallbackFn> wrapTargetMaterialization(FnT &&callback) const { return [callback = std::forward(callback)]( - OpBuilder &builder, Type resultType, ValueRange inputs, - Location loc, Type originalType) -> Value { - if (T derivedType = dyn_cast(resultType)) - return callback(builder, derivedType, inputs, loc, originalType); - return Value(); + OpBuilder &builder, TypeRange resultTypes, ValueRange inputs, + Location loc, Type originalType) -> SmallVector { + SmallVector result; + if constexpr (std::is_same::value) { + // This is a 1:N target materialization. Return the produces values + // directly. + result = callback(builder, resultTypes, inputs, loc, originalType); + } else if constexpr (std::is_assignable::value) { + // This is a 1:1 target materialization. Invoke the callback only if a + // single SSA value is requested. + if (resultTypes.size() == 1) { + // Invoke the callback only if the type class of the callback matches + // the requested result type. + if (T derivedType = dyn_cast(resultTypes.front())) { + // 1:1 materializations produce single values, but we store 1:N + // target materialization functions in the type converter. Wrap the + // result value in a SmallVector. + Value val = + callback(builder, derivedType, inputs, loc, originalType); + if (val) + result.push_back(val); + } + } + } else { + static_assert(sizeof(T) == 0, "T must be a Type or a TypeRange"); + } + return result; }; } /// With callback of form: - /// `Value(OpBuilder &, T, ValueRange, Location)` + /// - Value(OpBuilder &, T, ValueRange, Location) + /// - SmallVector(OpBuilder &, TypeRange, ValueRange, Location) template std::enable_if_t< std::is_invocable_v, @@ -432,9 +466,9 @@ class TypeConverter { wrapTargetMaterialization(FnT &&callback) const { return wrapTargetMaterialization( [callback = std::forward(callback)]( - OpBuilder &builder, T resultType, ValueRange inputs, Location loc, - Type originalType) -> Value { - return callback(builder, resultType, inputs, loc); + OpBuilder &builder, T resultTypes, ValueRange inputs, Location loc, + Type originalType) { + return callback(builder, resultTypes, inputs, loc); }); } diff --git a/mlir/include/mlir/Transforms/OneToNTypeConversion.h b/mlir/include/mlir/Transforms/OneToNTypeConversion.h index c59a3a52f028f32..7b4dd65cbff7b2d 100644 --- a/mlir/include/mlir/Transforms/OneToNTypeConversion.h +++ b/mlir/include/mlir/Transforms/OneToNTypeConversion.h @@ -33,49 +33,6 @@ namespace mlir { -/// Extends `TypeConverter` with 1:N target materializations. Such -/// materializations have to provide the "reverse" of 1:N type conversions, -/// i.e., they need to materialize N values with target types into one value -/// with a source type (which isn't possible in the base class currently). -class OneToNTypeConverter : public TypeConverter { -public: - /// Callback that expresses user-provided materialization logic from the given - /// value to N values of the given types. This is useful for expressing target - /// materializations for 1:N type conversions, which materialize one value in - /// a source type as N values in target types. - using OneToNMaterializationCallbackFn = - std::function>(OpBuilder &, TypeRange, - Value, Location)>; - - /// Creates the mapping of the given range of original types to target types - /// of the conversion and stores that mapping in the given (signature) - /// conversion. This function simply calls - /// `TypeConverter::convertSignatureArgs` and exists here with a different - /// name to reflect the broader semantic. - LogicalResult computeTypeMapping(TypeRange types, - SignatureConversion &result) const { - return convertSignatureArgs(types, result); - } - - /// Applies one of the user-provided 1:N target materializations. If several - /// exists, they are tried out in the reverse order in which they have been - /// added until the first one succeeds. If none succeeds, the functions - /// returns `std::nullopt`. - std::optional> - materializeTargetConversion(OpBuilder &builder, Location loc, - TypeRange resultTypes, Value input) const; - - /// Adds a 1:N target materialization to the converter. Such materializations - /// build IR that converts N values with target types into 1 value of the - /// source type. - void addTargetMaterialization(OneToNMaterializationCallbackFn &&callback) { - oneToNTargetMaterializations.emplace_back(std::move(callback)); - } - -private: - SmallVector oneToNTargetMaterializations; -}; - /// Stores a 1:N mapping of types and provides several useful accessors. This /// class extends `SignatureConversion`, which already supports 1:N type /// mappings but lacks some accessors into the mapping as well as access to the @@ -295,7 +252,7 @@ class OneToNOpConversionPattern : public OneToNConversionPattern { /// not fail if some ops or types remain unconverted (i.e., the conversion is /// only "partial"). LogicalResult -applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter, +applyPartialOneToNConversion(Operation *op, TypeConverter &typeConverter, const FrozenRewritePatternSet &patterns); /// Add a pattern to the given pattern list to convert the signature of a diff --git a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp index 4968c4fc463d04b..e908a536e6fb271 100644 --- a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp +++ b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp @@ -921,7 +921,7 @@ struct VectorLegalizationPass : public arm_sme::impl::VectorLegalizationBase { void runOnOperation() override { auto *context = &getContext(); - OneToNTypeConverter converter; + TypeConverter converter; RewritePatternSet patterns(context); converter.addConversion([](Type type) { return type; }); converter.addConversion( diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 3cfcaa965f3546a..3d0c81867e0cc26 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -2831,11 +2831,29 @@ Value TypeConverter::materializeTargetConversion(OpBuilder &builder, Location loc, Type resultType, ValueRange inputs, Type originalType) const { + SmallVector result = materializeTargetConversion( + builder, loc, TypeRange(resultType), inputs, originalType); + if (result.empty()) + return nullptr; + assert(result.size() == 1 && "expected single result"); + return result.front(); +} + +SmallVector TypeConverter::materializeTargetConversion( + OpBuilder &builder, Location loc, TypeRange resultTypes, ValueRange inputs, + Type originalType) const { for (const TargetMaterializationCallbackFn &fn : - llvm::reverse(targetMaterializations)) - if (Value result = fn(builder, resultType, inputs, loc, originalType)) - return result; - return nullptr; + llvm::reverse(targetMaterializations)) { + SmallVector result = + fn(builder, resultTypes, inputs, loc, originalType); + if (result.empty()) + continue; + assert(TypeRange(result) == resultTypes && + "callback produced incorrect number of values or values with " + "incorrect types"); + return result; + } + return {}; } std::optional diff --git a/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp b/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp index 19e29d48623e04c..c208716891ef1f4 100644 --- a/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp +++ b/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp @@ -17,20 +17,6 @@ using namespace llvm; using namespace mlir; -std::optional> -OneToNTypeConverter::materializeTargetConversion(OpBuilder &builder, - Location loc, - TypeRange resultTypes, - Value input) const { - for (const OneToNMaterializationCallbackFn &fn : - llvm::reverse(oneToNTargetMaterializations)) { - if (std::optional> result = - fn(builder, resultTypes, input, loc)) - return *result; - } - return std::nullopt; -} - TypeRange OneToNTypeMapping::getConvertedTypes(unsigned originalTypeNo) const { TypeRange convertedTypes = getConvertedTypes(); if (auto mapping = getInputMapping(originalTypeNo)) @@ -268,20 +254,20 @@ Block *OneToNPatternRewriter::applySignatureConversion( LogicalResult OneToNConversionPattern::matchAndRewrite(Operation *op, PatternRewriter &rewriter) const { - auto *typeConverter = getTypeConverter(); + auto *typeConverter = getTypeConverter(); // Construct conversion mapping for results. Operation::result_type_range originalResultTypes = op->getResultTypes(); OneToNTypeMapping resultMapping(originalResultTypes); - if (failed(typeConverter->computeTypeMapping(originalResultTypes, - resultMapping))) + if (failed(typeConverter->convertSignatureArgs(originalResultTypes, + resultMapping))) return failure(); // Construct conversion mapping for operands. Operation::operand_type_range originalOperandTypes = op->getOperandTypes(); OneToNTypeMapping operandMapping(originalOperandTypes); - if (failed(typeConverter->computeTypeMapping(originalOperandTypes, - operandMapping))) + if (failed(typeConverter->convertSignatureArgs(originalOperandTypes, + operandMapping))) return failure(); // Cast operands to target types. @@ -318,7 +304,7 @@ namespace mlir { // inserted by this pass are annotated with a string attribute that also // documents which kind of the cast (source, argument, or target). LogicalResult -applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter, +applyPartialOneToNConversion(Operation *op, TypeConverter &typeConverter, const FrozenRewritePatternSet &patterns) { #ifndef NDEBUG // Remember existing unrealized casts. This data structure is only used in @@ -370,15 +356,13 @@ applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter, // Target materialization. assert(!areOperandTypesLegal && areResultsTypesLegal && operands.size() == 1 && "found unexpected target cast"); - std::optional> maybeResults = - typeConverter.materializeTargetConversion( - rewriter, castOp->getLoc(), resultTypes, operands.front()); - if (!maybeResults) { + materializedResults = typeConverter.materializeTargetConversion( + rewriter, castOp->getLoc(), resultTypes, operands.front()); + if (materializedResults.empty()) { emitError(castOp->getLoc()) << "failed to create target materialization"; return failure(); } - materializedResults = maybeResults.value(); } else { // Source and argument materializations. assert(areOperandTypesLegal && !areResultsTypesLegal && @@ -427,18 +411,18 @@ class FunctionOpInterfaceSignatureConversion : public OneToNConversionPattern { const OneToNTypeMapping &resultMapping, ValueRange convertedOperands) const override { auto funcOp = cast(op); - auto *typeConverter = getTypeConverter(); + auto *typeConverter = getTypeConverter(); // Construct mapping for function arguments. OneToNTypeMapping argumentMapping(funcOp.getArgumentTypes()); - if (failed(typeConverter->computeTypeMapping(funcOp.getArgumentTypes(), - argumentMapping))) + if (failed(typeConverter->convertSignatureArgs(funcOp.getArgumentTypes(), + argumentMapping))) return failure(); // Construct mapping for function results. OneToNTypeMapping funcResultMapping(funcOp.getResultTypes()); - if (failed(typeConverter->computeTypeMapping(funcOp.getResultTypes(), - funcResultMapping))) + if (failed(typeConverter->convertSignatureArgs(funcOp.getResultTypes(), + funcResultMapping))) return failure(); // Nothing to do if the op doesn't have any non-identity conversions for its diff --git a/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp b/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp index 5c03ac12d1e58ce..b18dfd8bb22cb15 100644 --- a/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp +++ b/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp @@ -147,9 +147,14 @@ populateDecomposeTuplesTestPatterns(const TypeConverter &typeConverter, /// /// This function has been copied (with small adaptions) from /// TestDecomposeCallGraphTypes.cpp. -static std::optional> -buildGetTupleElementOps(OpBuilder &builder, TypeRange resultTypes, Value input, - Location loc) { +static SmallVector buildGetTupleElementOps(OpBuilder &builder, + TypeRange resultTypes, + ValueRange inputs, + Location loc) { + if (inputs.size() != 1) + return {}; + Value input = inputs.front(); + TupleType inputType = dyn_cast(input.getType()); if (!inputType) return {}; @@ -222,7 +227,7 @@ void TestOneToNTypeConversionPass::runOnOperation() { auto *context = &getContext(); // Assemble type converter. - OneToNTypeConverter typeConverter; + TypeConverter typeConverter; typeConverter.addConversion([](Type type) { return type; }); typeConverter.addConversion( @@ -234,6 +239,11 @@ void TestOneToNTypeConversionPass::runOnOperation() { typeConverter.addArgumentMaterialization(buildMakeTupleOp); typeConverter.addSourceMaterialization(buildMakeTupleOp); typeConverter.addTargetMaterialization(buildGetTupleElementOps); + // Test the other target materialization variant that takes the original type + // as additional argument. This materialization function always fails. + typeConverter.addTargetMaterialization( + [](OpBuilder &builder, TypeRange resultTypes, ValueRange inputs, + Location loc, Type originalType) -> SmallVector { return {}; }); // Assemble patterns. RewritePatternSet patterns(context); From e724226da753f10fd36fbb0ea392f04ab0fdbdab Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 25 Oct 2024 12:35:33 +0100 Subject: [PATCH 054/425] [VPlan] Return cost of 0 for VPWidenCastRecipe without underlying value. In some cases, VPWidenCastRecipes are created but not considered in the legacy cost model, including truncates/extends when evaluating a reduction in a smaller type. Return 0 for such casts for now, to avoid divergences between VPlan and legacy cost models. Fixes https://github.com/llvm/llvm-project/issues/113526. --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 5 ++ .../LoopVectorize/X86/cost-model.ll | 65 +++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 0eb4f7c7c88cee7..2080b77157b6ca2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1524,6 +1524,11 @@ void VPWidenCastRecipe::execute(VPTransformState &State) { InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF, VPCostContext &Ctx) const { + // TODO: In some cases, VPWidenCastRecipes are created but not considered in + // the legacy cost model, including truncates/extends when evaluating a + // reduction in a smaller type. + if (!getUnderlyingValue()) + return 0; // Computes the CastContextHint from a recipes that may access memory. auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint { if (VF.isScalar()) diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 73647919aac3602..29e54fabad0c1bb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -1037,6 +1037,71 @@ exit: ret i64 %red.mul } +; Test case for https://github.com/llvm/llvm-project/issues/113526. +define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 { +; CHECK-LABEL: @narrowed_reduction( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP:%.*]] to i32 +; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], +; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[TMP2]] to <16 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1> +; CHECK-NEXT: [[TMP6]] = zext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP7]] = zext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i1> +; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i1> [[TMP10]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[BIN_RDX]]) +; CHECK-NEXT: [[TMP12:%.*]] = zext i1 [[TMP11]] to i32 +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[OR13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[OR13]], 1 +; CHECK-NEXT: [[OR]] = or i32 [[AND]], [[CONV]] +; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 0 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[OR_LCSSA]] +; +entry: + %conv = zext i1 %cmp to i32 + br label %loop + +loop: + %iv = phi i32 [ 1, %entry ], [ %inc, %loop ] + %or13 = phi i32 [ 0, %entry ], [ %or, %loop ] + %and = and i32 %or13, 1 + %or = or i32 %and, %conv + %inc = add i32 %iv, 1 + %ec = icmp eq i32 %iv, 0 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %or +} + declare void @llvm.assume(i1 noundef) #0 attributes #0 = { "target-cpu"="penryn" } From 75252e29ea6a0959f3c1670e641a03fc18fc65fa Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 25 Oct 2024 12:40:59 -0700 Subject: [PATCH 055/425] [clang][serialization] Bump `NUM_PREDEF_TYPE_IDS` This fixes a build error caused by 4ac0e7e400fe2a66d1fd5d5d1fa1c899dfb16716. --- clang/include/clang/Serialization/ASTBitCodes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 99232fd21357904..3ddbc5fcd26c44f 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1149,7 +1149,7 @@ enum PredefinedTypeIDs { /// /// Type IDs for non-predefined types will start at /// NUM_PREDEF_TYPE_IDs. -const unsigned NUM_PREDEF_TYPE_IDS = 512; +const unsigned NUM_PREDEF_TYPE_IDS = 513; // Ensure we do not overrun the predefined types we reserved // in the enum PredefinedTypeIDs above. From 6c9bbbc818ae8a0d2849dbc1ebd84a220cc27d20 Mon Sep 17 00:00:00 2001 From: vporpo Date: Fri, 25 Oct 2024 12:47:19 -0700 Subject: [PATCH 056/425] [SandboxVec][Legality] Reject non-instructions (#113190) --- .../Vectorize/SandboxVectorizer/Legality.h | 10 +++++++++- .../Vectorize/SandboxVectorizer/Legality.cpp | 18 +++++++++++++++++- .../SandboxVectorizer/Passes/BottomUpVec.cpp | 2 +- .../SandboxVectorizer/LegalityTest.cpp | 13 ++++++++++++- 4 files changed, 39 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index bcfafd75d4caaf5..d4b0b54375b0267 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -28,6 +28,7 @@ enum class LegalityResultID { /// The reason for vectorizing or not vectorizing. enum class ResultReason { + NotInstructions, DiffOpcodes, DiffTypes, }; @@ -46,6 +47,8 @@ struct ToStr { static const char *getVecReason(ResultReason Reason) { switch (Reason) { + case ResultReason::NotInstructions: + return "NotInstructions"; case ResultReason::DiffOpcodes: return "DiffOpcodes"; case ResultReason::DiffTypes: @@ -67,6 +70,10 @@ class LegalityResult { LegalityResult(LegalityResultID ID) : ID(ID) {} friend class LegalityAnalysis; + /// We shouldn't need copies. + LegalityResult(const LegalityResult &) = delete; + LegalityResult &operator=(const LegalityResult &) = delete; + public: virtual ~LegalityResult() {} LegalityResultID getSubclassID() const { return ID; } @@ -90,6 +97,7 @@ class LegalityResultWithReason : public LegalityResult { friend class Pack; // For constructor. public: + ResultReason getReason() const { return Reason; } #ifndef NDEBUG void print(raw_ostream &OS) const override { LegalityResult::print(OS); @@ -138,7 +146,7 @@ class LegalityAnalysis { } /// Checks if it's legal to vectorize the instructions in \p Bndl. /// \Returns a LegalityResult object owned by LegalityAnalysis. - LegalityResult &canVectorize(ArrayRef Bndl); + const LegalityResult &canVectorize(ArrayRef Bndl); }; } // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index 0e2cd83c37b0cd0..f1c4577cece78af 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -7,11 +7,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Debug.h" namespace llvm::sandboxir { +#define DEBUG_TYPE "SBVec:Legality" + #ifndef NDEBUG void LegalityResult::dump() const { print(dbgs()); @@ -26,7 +30,19 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( return std::nullopt; } -LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { +static void dumpBndl(ArrayRef Bndl) { + for (auto *V : Bndl) + dbgs() << *V << "\n"; +} + +const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { + // If Bndl contains values other than instructions, we need to Pack. + if (any_of(Bndl, [](auto *V) { return !isa(V); })) { + LLVM_DEBUG(dbgs() << "Not vectorizing: Not Instructions:\n"; + dumpBndl(Bndl);); + return createLegalityResult(ResultReason::NotInstructions); + } + if (auto ReasonOpt = notVectorizableBasedOnOpcodesAndTypes(Bndl)) return createLegalityResult(*ReasonOpt); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index f11420e47f3e1f9..ede41cd661b559a 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -40,7 +40,7 @@ static SmallVector getOperand(ArrayRef Bndl, } void BottomUpVec::vectorizeRec(ArrayRef Bndl) { - auto LegalityRes = Legality.canVectorize(Bndl); + const auto &LegalityRes = Legality.canVectorize(Bndl); switch (LegalityRes.getSubclassID()) { case LegalityResultID::Widen: { auto *I = cast(Bndl[0]); diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 76e5a5ce5aed920..56c6bf5f1ef1f5c 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -52,8 +52,16 @@ define void @foo(ptr %ptr) { auto *St1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; - auto Result = Legality.canVectorize({St0, St1}); + const auto &Result = Legality.canVectorize({St0, St1}); EXPECT_TRUE(isa(Result)); + + { + // Check NotInstructions + auto &Result = Legality.canVectorize({F, St0}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::NotInstructions); + } } #ifndef NDEBUG @@ -68,6 +76,9 @@ TEST_F(LegalityTest, LegalityResultDump) { sandboxir::LegalityAnalysis Legality; EXPECT_TRUE( Matches(Legality.createLegalityResult(), "Widen")); + EXPECT_TRUE(Matches(Legality.createLegalityResult( + sandboxir::ResultReason::NotInstructions), + "Pack Reason: NotInstructions")); EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffOpcodes), "Pack Reason: DiffOpcodes")); From eb9f4756bc3daaa4b19f4f46521dc05180814de4 Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Fri, 25 Oct 2024 12:52:31 -0700 Subject: [PATCH 057/425] Revert "[SandboxVec][Legality] Reject non-instructions (#113190)" This reverts commit 6c9bbbc818ae8a0d2849dbc1ebd84a220cc27d20. --- .../Vectorize/SandboxVectorizer/Legality.h | 10 +--------- .../Vectorize/SandboxVectorizer/Legality.cpp | 18 +----------------- .../SandboxVectorizer/Passes/BottomUpVec.cpp | 2 +- .../SandboxVectorizer/LegalityTest.cpp | 13 +------------ 4 files changed, 4 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index d4b0b54375b0267..bcfafd75d4caaf5 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -28,7 +28,6 @@ enum class LegalityResultID { /// The reason for vectorizing or not vectorizing. enum class ResultReason { - NotInstructions, DiffOpcodes, DiffTypes, }; @@ -47,8 +46,6 @@ struct ToStr { static const char *getVecReason(ResultReason Reason) { switch (Reason) { - case ResultReason::NotInstructions: - return "NotInstructions"; case ResultReason::DiffOpcodes: return "DiffOpcodes"; case ResultReason::DiffTypes: @@ -70,10 +67,6 @@ class LegalityResult { LegalityResult(LegalityResultID ID) : ID(ID) {} friend class LegalityAnalysis; - /// We shouldn't need copies. - LegalityResult(const LegalityResult &) = delete; - LegalityResult &operator=(const LegalityResult &) = delete; - public: virtual ~LegalityResult() {} LegalityResultID getSubclassID() const { return ID; } @@ -97,7 +90,6 @@ class LegalityResultWithReason : public LegalityResult { friend class Pack; // For constructor. public: - ResultReason getReason() const { return Reason; } #ifndef NDEBUG void print(raw_ostream &OS) const override { LegalityResult::print(OS); @@ -146,7 +138,7 @@ class LegalityAnalysis { } /// Checks if it's legal to vectorize the instructions in \p Bndl. /// \Returns a LegalityResult object owned by LegalityAnalysis. - const LegalityResult &canVectorize(ArrayRef Bndl); + LegalityResult &canVectorize(ArrayRef Bndl); }; } // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index f1c4577cece78af..0e2cd83c37b0cd0 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -7,15 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" -#include "llvm/SandboxIR/Instruction.h" -#include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Debug.h" namespace llvm::sandboxir { -#define DEBUG_TYPE "SBVec:Legality" - #ifndef NDEBUG void LegalityResult::dump() const { print(dbgs()); @@ -30,19 +26,7 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( return std::nullopt; } -static void dumpBndl(ArrayRef Bndl) { - for (auto *V : Bndl) - dbgs() << *V << "\n"; -} - -const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { - // If Bndl contains values other than instructions, we need to Pack. - if (any_of(Bndl, [](auto *V) { return !isa(V); })) { - LLVM_DEBUG(dbgs() << "Not vectorizing: Not Instructions:\n"; - dumpBndl(Bndl);); - return createLegalityResult(ResultReason::NotInstructions); - } - +LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { if (auto ReasonOpt = notVectorizableBasedOnOpcodesAndTypes(Bndl)) return createLegalityResult(*ReasonOpt); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index ede41cd661b559a..f11420e47f3e1f9 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -40,7 +40,7 @@ static SmallVector getOperand(ArrayRef Bndl, } void BottomUpVec::vectorizeRec(ArrayRef Bndl) { - const auto &LegalityRes = Legality.canVectorize(Bndl); + auto LegalityRes = Legality.canVectorize(Bndl); switch (LegalityRes.getSubclassID()) { case LegalityResultID::Widen: { auto *I = cast(Bndl[0]); diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 56c6bf5f1ef1f5c..76e5a5ce5aed920 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -52,16 +52,8 @@ define void @foo(ptr %ptr) { auto *St1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; - const auto &Result = Legality.canVectorize({St0, St1}); + auto Result = Legality.canVectorize({St0, St1}); EXPECT_TRUE(isa(Result)); - - { - // Check NotInstructions - auto &Result = Legality.canVectorize({F, St0}); - EXPECT_TRUE(isa(Result)); - EXPECT_EQ(cast(Result).getReason(), - sandboxir::ResultReason::NotInstructions); - } } #ifndef NDEBUG @@ -76,9 +68,6 @@ TEST_F(LegalityTest, LegalityResultDump) { sandboxir::LegalityAnalysis Legality; EXPECT_TRUE( Matches(Legality.createLegalityResult(), "Widen")); - EXPECT_TRUE(Matches(Legality.createLegalityResult( - sandboxir::ResultReason::NotInstructions), - "Pack Reason: NotInstructions")); EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffOpcodes), "Pack Reason: DiffOpcodes")); From 1540f772c793b3a29ae5d57e99456ec5d7ef4b39 Mon Sep 17 00:00:00 2001 From: Vasileios Porpodas Date: Fri, 25 Oct 2024 12:53:26 -0700 Subject: [PATCH 058/425] Reapply "[SandboxVec][Legality] Reject non-instructions (#113190)" This reverts commit eb9f4756bc3daaa4b19f4f46521dc05180814de4. --- .../Vectorize/SandboxVectorizer/Legality.h | 10 +++++++++- .../Vectorize/SandboxVectorizer/Legality.cpp | 20 ++++++++++++++++++- .../SandboxVectorizer/Passes/BottomUpVec.cpp | 2 +- .../SandboxVectorizer/LegalityTest.cpp | 13 +++++++++++- 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index bcfafd75d4caaf5..d4b0b54375b0267 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -28,6 +28,7 @@ enum class LegalityResultID { /// The reason for vectorizing or not vectorizing. enum class ResultReason { + NotInstructions, DiffOpcodes, DiffTypes, }; @@ -46,6 +47,8 @@ struct ToStr { static const char *getVecReason(ResultReason Reason) { switch (Reason) { + case ResultReason::NotInstructions: + return "NotInstructions"; case ResultReason::DiffOpcodes: return "DiffOpcodes"; case ResultReason::DiffTypes: @@ -67,6 +70,10 @@ class LegalityResult { LegalityResult(LegalityResultID ID) : ID(ID) {} friend class LegalityAnalysis; + /// We shouldn't need copies. + LegalityResult(const LegalityResult &) = delete; + LegalityResult &operator=(const LegalityResult &) = delete; + public: virtual ~LegalityResult() {} LegalityResultID getSubclassID() const { return ID; } @@ -90,6 +97,7 @@ class LegalityResultWithReason : public LegalityResult { friend class Pack; // For constructor. public: + ResultReason getReason() const { return Reason; } #ifndef NDEBUG void print(raw_ostream &OS) const override { LegalityResult::print(OS); @@ -138,7 +146,7 @@ class LegalityAnalysis { } /// Checks if it's legal to vectorize the instructions in \p Bndl. /// \Returns a LegalityResult object owned by LegalityAnalysis. - LegalityResult &canVectorize(ArrayRef Bndl); + const LegalityResult &canVectorize(ArrayRef Bndl); }; } // namespace llvm::sandboxir diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index 0e2cd83c37b0cd0..e4546c2f98113ee 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -7,11 +7,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" +#include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Debug.h" namespace llvm::sandboxir { +#define DEBUG_TYPE "SBVec:Legality" + #ifndef NDEBUG void LegalityResult::dump() const { print(dbgs()); @@ -26,7 +30,21 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( return std::nullopt; } -LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { +#ifndef NDEBUG +static void dumpBndl(ArrayRef Bndl) { + for (auto *V : Bndl) + dbgs() << *V << "\n"; +} +#endif // NDEBUG + +const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef Bndl) { + // If Bndl contains values other than instructions, we need to Pack. + if (any_of(Bndl, [](auto *V) { return !isa(V); })) { + LLVM_DEBUG(dbgs() << "Not vectorizing: Not Instructions:\n"; + dumpBndl(Bndl);); + return createLegalityResult(ResultReason::NotInstructions); + } + if (auto ReasonOpt = notVectorizableBasedOnOpcodesAndTypes(Bndl)) return createLegalityResult(*ReasonOpt); diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index f11420e47f3e1f9..ede41cd661b559a 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -40,7 +40,7 @@ static SmallVector getOperand(ArrayRef Bndl, } void BottomUpVec::vectorizeRec(ArrayRef Bndl) { - auto LegalityRes = Legality.canVectorize(Bndl); + const auto &LegalityRes = Legality.canVectorize(Bndl); switch (LegalityRes.getSubclassID()) { case LegalityResultID::Widen: { auto *I = cast(Bndl[0]); diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 76e5a5ce5aed920..56c6bf5f1ef1f5c 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -52,8 +52,16 @@ define void @foo(ptr %ptr) { auto *St1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; - auto Result = Legality.canVectorize({St0, St1}); + const auto &Result = Legality.canVectorize({St0, St1}); EXPECT_TRUE(isa(Result)); + + { + // Check NotInstructions + auto &Result = Legality.canVectorize({F, St0}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::NotInstructions); + } } #ifndef NDEBUG @@ -68,6 +76,9 @@ TEST_F(LegalityTest, LegalityResultDump) { sandboxir::LegalityAnalysis Legality; EXPECT_TRUE( Matches(Legality.createLegalityResult(), "Widen")); + EXPECT_TRUE(Matches(Legality.createLegalityResult( + sandboxir::ResultReason::NotInstructions), + "Pack Reason: NotInstructions")); EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffOpcodes), "Pack Reason: DiffOpcodes")); From cfde4fbccf5d8d949a8cade0a4f8ef9b0f47ca73 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 25 Oct 2024 16:46:38 -0400 Subject: [PATCH 059/425] [libc++] Remove obsolete Solaris and Newlib support for locales (#113721) The solaris header file doesn't even exist, so that's definitely dead code. The newlib header is empty, which means that localization can't work on that platform. If someone is using libc++ with Newlib, they must be providing LIBCXX_HAS_NO_LOCALIZATION today for anything to work, so that header is basically dead code as well. --- libcxx/include/CMakeLists.txt | 1 - libcxx/include/__locale_dir/locale_base_api.h | 4 ---- libcxx/include/__locale_dir/locale_base_api/newlib.h | 12 ------------ libcxx/include/module.modulemap | 1 - 4 files changed, 18 deletions(-) delete mode 100644 libcxx/include/__locale_dir/locale_base_api/newlib.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 63aa74e09bb1a27..506ed721d0843ec 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -497,7 +497,6 @@ set(files __locale_dir/locale_base_api/fuchsia.h __locale_dir/locale_base_api/ibm.h __locale_dir/locale_base_api/musl.h - __locale_dir/locale_base_api/newlib.h __locale_dir/locale_base_api/openbsd.h __locale_dir/locale_base_api/win32.h __locale_dir/locale_guard.h diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h index 8c000c558c52793..eab7fa8bf62faec 100644 --- a/libcxx/include/__locale_dir/locale_base_api.h +++ b/libcxx/include/__locale_dir/locale_base_api.h @@ -15,10 +15,6 @@ # include <__locale_dir/locale_base_api/ibm.h> #elif defined(__ANDROID__) # include <__locale_dir/locale_base_api/android.h> -#elif defined(__sun__) -# include <__locale_dir/locale_base_api/solaris.h> -#elif defined(_NEWLIB_VERSION) -# include <__locale_dir/locale_base_api/newlib.h> #elif defined(__OpenBSD__) # include <__locale_dir/locale_base_api/openbsd.h> #elif defined(__Fuchsia__) diff --git a/libcxx/include/__locale_dir/locale_base_api/newlib.h b/libcxx/include/__locale_dir/locale_base_api/newlib.h deleted file mode 100644 index 7da10e5889843dd..000000000000000 --- a/libcxx/include/__locale_dir/locale_base_api/newlib.h +++ /dev/null @@ -1,12 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H -#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H - -#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_NEWLIB_H diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index c79070c318759db..f92e8bf5fc9aba5 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1451,7 +1451,6 @@ module std [system] { textual header "__locale_dir/locale_base_api/fuchsia.h" textual header "__locale_dir/locale_base_api/ibm.h" textual header "__locale_dir/locale_base_api/musl.h" - textual header "__locale_dir/locale_base_api/newlib.h" textual header "__locale_dir/locale_base_api/openbsd.h" textual header "__locale_dir/locale_base_api/win32.h" } From 1bc2cd98c58a1059170dc38697c7a29a8e21160b Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 25 Oct 2024 13:52:51 -0700 Subject: [PATCH 060/425] [WebAssembly] Enable nontrapping-fptoint and bulk-memory by default. (#112049) We were prepared to enable these features [back in February], but they got pulled for what appear to be unrelated reasons. So let's have another try at enabling them! Another motivation here is that it'd be convenient for the [Lime1 proposal] if "lime1" is close to a subset of "generic" (missing only for extended-const). [back in February]: https://github.com/WebAssembly/tool-conventions/issues/158#issuecomment-1931119512 [Lime1 proposal]: https://github.com/llvm/llvm-project/pull/112035 --- clang/docs/ReleaseNotes.rst | 9 ++++++ clang/lib/Basic/Targets/WebAssembly.cpp | 4 +-- .../test/Preprocessor/wasm-target-features.c | 4 +-- lld/test/wasm/custom-section-name.ll | 2 +- lld/test/wasm/data-segments.ll | 2 +- lld/test/wasm/lto/Inputs/libcall-archive.ll | 4 ++- lld/test/wasm/lto/libcall-archive.ll | 4 ++- lld/test/wasm/lto/stub-library-libcall.s | 4 +-- llvm/docs/ReleaseNotes.md | 9 ++++++ llvm/lib/Target/WebAssembly/WebAssembly.td | 3 +- .../WebAssemblyFixFunctionBitcasts.cpp | 2 ++ .../WebAssembly/WebAssemblyTargetMachine.cpp | 29 +++++++++++++++---- .../WebAssembly/cfg-stackify-eh-legacy.ll | 10 +++---- .../WebAssembly/target-features-cpus.ll | 8 ++++- .../WebAssembly/extern-functype-intrinsic.ll | 4 +-- llvm/test/MC/WebAssembly/libcall.ll | 2 +- 16 files changed, 74 insertions(+), 26 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 170c4cc280537f9..6a95337815174bc 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -690,6 +690,15 @@ NetBSD Support WebAssembly Support ^^^^^^^^^^^^^^^^^^^ +The default target CPU, "generic", now enables the `-mnontrapping-fptoint` +and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations] +and [Non-trapping float-to-int Conversions] language features, which are +[widely implemented in engines]. + +[Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md +[Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md +[widely implemented in engines]: https://webassembly.org/features/ + AVR Support ^^^^^^^^^^^ diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 4c9df6007b78231..0b380bdf835ffbd 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -154,20 +154,20 @@ bool WebAssemblyTargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const { auto addGenericFeatures = [&]() { + Features["bulk-memory"] = true; Features["multivalue"] = true; Features["mutable-globals"] = true; + Features["nontrapping-fptoint"] = true; Features["reference-types"] = true; Features["sign-ext"] = true; }; auto addBleedingEdgeFeatures = [&]() { addGenericFeatures(); Features["atomics"] = true; - Features["bulk-memory"] = true; Features["exception-handling"] = true; Features["extended-const"] = true; Features["fp16"] = true; Features["multimemory"] = true; - Features["nontrapping-fptoint"] = true; Features["tail-call"] = true; Features["wide-arithmetic"] = true; setSIMDLevel(Features, RelaxedSIMD, true); diff --git a/clang/test/Preprocessor/wasm-target-features.c b/clang/test/Preprocessor/wasm-target-features.c index 14d2fbf4423d32b..71b7cf6a5d43cc1 100644 --- a/clang/test/Preprocessor/wasm-target-features.c +++ b/clang/test/Preprocessor/wasm-target-features.c @@ -163,8 +163,10 @@ // RUN: -target wasm64-unknown-unknown -mcpu=generic \ // RUN: | FileCheck %s -check-prefix=GENERIC-INCLUDE // +// GENERIC-INCLUDE-DAG: #define __wasm_bulk_memory__ 1{{$}} // GENERIC-INCLUDE-DAG: #define __wasm_multivalue__ 1{{$}} // GENERIC-INCLUDE-DAG: #define __wasm_mutable_globals__ 1{{$}} +// GENERIC-INCLUDE-DAG: #define __wasm_nontrapping_fptoint__ 1{{$}} // GENERIC-INCLUDE-DAG: #define __wasm_reference_types__ 1{{$}} // GENERIC-INCLUDE-DAG: #define __wasm_sign_ext__ 1{{$}} // @@ -176,12 +178,10 @@ // RUN: | FileCheck %s -check-prefix=GENERIC // // GENERIC-NOT: #define __wasm_atomics__ 1{{$}} -// GENERIC-NOT: #define __wasm_bulk_memory__ 1{{$}} // GENERIC-NOT: #define __wasm_exception_handling__ 1{{$}} // GENERIC-NOT: #define __wasm_extended_const__ 1{{$}} // GENERIC-NOT: #define __wasm__fp16__ 1{{$}} // GENERIC-NOT: #define __wasm_multimemory__ 1{{$}} -// GENERIC-NOT: #define __wasm_nontrapping_fptoint__ 1{{$}} // GENERIC-NOT: #define __wasm_relaxed_simd__ 1{{$}} // GENERIC-NOT: #define __wasm_simd128__ 1{{$}} // GENERIC-NOT: #define __wasm_tail_call__ 1{{$}} diff --git a/lld/test/wasm/custom-section-name.ll b/lld/test/wasm/custom-section-name.ll index b860ef5a83e8364..8799fbf36056d1d 100644 --- a/lld/test/wasm/custom-section-name.ll +++ b/lld/test/wasm/custom-section-name.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o %t.o +; RUN: llc -filetype=obj -mattr=-bulk-memory %s -o %t.o ; RUN: wasm-ld -no-gc-sections --no-entry -o %t.wasm %t.o ; RUN: obj2yaml %t.wasm | FileCheck %s --check-prefixes=CHECK,NO-BSS ; RUN: wasm-ld -no-gc-sections --no-entry --import-memory -o %t.bss.wasm %t.o diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll index 670ac3c1f373faf..41868a0b2b50b6f 100644 --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -1,4 +1,4 @@ -; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.atomics.o -mattr=+atomics +; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.atomics.o -mattr=+atomics,-bulk-memory ; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.bulk-mem.o -mattr=+bulk-memory ; RUN: llc --mtriple=wasm64-unknown-unknown -filetype=obj %s -o %t.bulk-mem64.o -mattr=+bulk-memory ; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.atomics.bulk-mem.o -mattr=+atomics,+bulk-memory diff --git a/lld/test/wasm/lto/Inputs/libcall-archive.ll b/lld/test/wasm/lto/Inputs/libcall-archive.ll index 9d05efdeae0806e..7d8c34196dfe49a 100644 --- a/lld/test/wasm/lto/Inputs/libcall-archive.ll +++ b/lld/test/wasm/lto/Inputs/libcall-archive.ll @@ -1,6 +1,8 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" target triple = "wasm32-unknown-unknown" -define void @memcpy() { +define void @memcpy() #0 { ret void } + +attributes #0 = { "target-features"="-bulk-memory" } diff --git a/lld/test/wasm/lto/libcall-archive.ll b/lld/test/wasm/lto/libcall-archive.ll index 2f785b98976ec88..5c46d2f7ed78381 100644 --- a/lld/test/wasm/lto/libcall-archive.ll +++ b/lld/test/wasm/lto/libcall-archive.ll @@ -8,7 +8,7 @@ target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20" target triple = "wasm32-unknown-unknown" -define void @_start(ptr %a, ptr %b) { +define void @_start(ptr %a, ptr %b) #0 { entry: call void @llvm.memcpy.p0.p0.i64(ptr %a, ptr %b, i64 1024, i1 false) ret void @@ -16,6 +16,8 @@ entry: declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) +attributes #0 = { "target-features"="-bulk-memory" } + ; CHECK: - Type: CUSTOM ; CHECK-NEXT: Name: name ; CHECK-NEXT: FunctionNames: diff --git a/lld/test/wasm/lto/stub-library-libcall.s b/lld/test/wasm/lto/stub-library-libcall.s index ce88a32dd99dc7b..d65983c0cf5bf52 100644 --- a/lld/test/wasm/lto/stub-library-libcall.s +++ b/lld/test/wasm/lto/stub-library-libcall.s @@ -2,7 +2,7 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t_main.o %t/main.s # RUN: llvm-as %S/Inputs/foo.ll -o %t_foo.o # RUN: llvm-as %S/Inputs/libcall.ll -o %t_libcall.o -# RUN: wasm-ld %t_main.o %t_libcall.o %t_foo.o %p/Inputs/stub.so -o %t.wasm +# RUN: wasm-ld -mllvm -mattr=-bulk-memory %t_main.o %t_libcall.o %t_foo.o %p/Inputs/stub.so -o %t.wasm # RUN: obj2yaml %t.wasm | FileCheck %s # The function `func_with_libcall` will generate an undefined reference to @@ -12,7 +12,7 @@ # If %t_foo.o is not included in the link we get an undefined symbol reported # to the dependency of memcpy on the foo export: -# RUN: not wasm-ld %t_main.o %t_libcall.o %p/Inputs/stub.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING %s +# RUN: not wasm-ld -mllvm -mattr=-bulk-memory %t_main.o %t_libcall.o %p/Inputs/stub.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING %s # MISSING: stub.so: undefined symbol: foo. Required by memcpy #--- main.s diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index be51b0af56ddbf7..e3d93f0dfd0ec55 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -180,6 +180,15 @@ Changes to the RISC-V Backend Changes to the WebAssembly Backend ---------------------------------- +The default target CPU, "generic", now enables the `-mnontrapping-fptoint` +and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations] +and [Non-trapping float-to-int Conversions] language features, which are +[widely implemented in engines]. + +[Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md +[Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md +[widely implemented in engines]: https://webassembly.org/features/ + Changes to the Windows Target ----------------------------- diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td index 37d99690c25b1fa..88628f2a7935453 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.td +++ b/llvm/lib/Target/WebAssembly/WebAssembly.td @@ -114,7 +114,8 @@ def : ProcessorModel<"mvp", NoSchedModel, []>; // consideration given to available support in relevant engines and tools, and // the importance of the features. def : ProcessorModel<"generic", NoSchedModel, - [FeatureMultivalue, FeatureMutableGlobals, + [FeatureBulkMemory, FeatureMultivalue, + FeatureMutableGlobals, FeatureNontrappingFPToInt, FeatureReferenceTypes, FeatureSignExt]>; // Latest and greatest experimental version of WebAssembly. Bugs included! diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp index a3cc9bae470859b..7c3e8d18ad276bb 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -111,6 +111,7 @@ static Function *createWrapper(Function *F, FunctionType *Ty) { Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage, F->getName() + "_bitcast", M); + Wrapper->setAttributes(F->getAttributes()); BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper); const DataLayout &DL = BB->getDataLayout(); @@ -201,6 +202,7 @@ static Function *createWrapper(Function *F, FunctionType *Ty) { Wrapper->eraseFromParent(); Wrapper = Function::Create(Ty, Function::PrivateLinkage, F->getName() + "_bitcast_invalid", M); + Wrapper->setAttributes(F->getAttributes()); BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper); new UnreachableInst(M->getContext(), BB); Wrapper->setName(F->getName() + "_bitcast_invalid"); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 3fe6ccf1c608e1e..83cd57d0bbdd557 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -233,13 +233,30 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { private: FeatureBitset coalesceFeatures(const Module &M) { - FeatureBitset Features = - WasmTM - ->getSubtargetImpl(std::string(WasmTM->getTargetCPU()), - std::string(WasmTM->getTargetFeatureString())) - ->getFeatureBits(); - for (auto &F : M) + // Union the features of all defined functions. Start with an empty set, so + // that if a feature is disabled in every function, we'll compute it as + // disabled. If any function lacks a target-features attribute, it'll + // default to the target CPU from the `TargetMachine`. + FeatureBitset Features; + bool AnyDefinedFuncs = false; + for (auto &F : M) { + if (F.isDeclaration()) + continue; + Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits(); + AnyDefinedFuncs = true; + } + + // If we have no defined functions, use the target CPU from the + // `TargetMachine`. + if (!AnyDefinedFuncs) { + Features = + WasmTM + ->getSubtargetImpl(std::string(WasmTM->getTargetCPU()), + std::string(WasmTM->getTargetFeatureString())) + ->getFeatureBits(); + } + return Features; } diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll index cef92f459e4aa37..24a08267db6fbf7 100644 --- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll +++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll @@ -1,9 +1,9 @@ ; REQUIRES: asserts -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s --check-prefix=NOOPT -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT -; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,bulk-memory | FileCheck %s +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,bulk-memory +; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory | FileCheck %s --check-prefix=NOOPT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS target triple = "wasm32-unknown-unknown" diff --git a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll index 77d1564409f78cc..ba10dd94a9838dc 100644 --- a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll +++ b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll @@ -13,7 +13,10 @@ target triple = "wasm32-unknown-unknown" ; generic: +multivalue, +mutable-globals, +reference-types, +sign-ext ; GENERIC-LABEL: .custom_section.target_features,"",@ -; GENERIC-NEXT: .int8 4 +; GENERIC-NEXT: .int8 6 +; GENERIC-NEXT: .int8 43 +; GENERIC-NEXT: .int8 11 +; GENERIC-NEXT: .ascii "bulk-memory" ; GENERIC-NEXT: .int8 43 ; GENERIC-NEXT: .int8 10 ; GENERIC-NEXT: .ascii "multivalue" @@ -21,6 +24,9 @@ target triple = "wasm32-unknown-unknown" ; GENERIC-NEXT: .int8 15 ; GENERIC-NEXT: .ascii "mutable-globals" ; GENERIC-NEXT: .int8 43 +; GENERIC-NEXT: .int8 19 +; GENERIC-NEXT: .ascii "nontrapping-fptoint" +; GENERIC-NEXT: .int8 43 ; GENERIC-NEXT: .int8 15 ; GENERIC-NEXT: .ascii "reference-types" ; GENERIC-NEXT: .int8 43 diff --git a/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll b/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll index 320b65356ba9f37..b321c0c82ad4d31 100644 --- a/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll +++ b/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll @@ -1,5 +1,5 @@ -; RUN: llc %s -o - | FileCheck %s -; RUN: llc %s -o - | llvm-mc -triple=wasm32-unknown-unknown | FileCheck %s +; RUN: llc %s -mattr=-bulk-memory -o - | FileCheck %s +; RUN: llc %s -mattr=-bulk-memory -o - | llvm-mc -triple=wasm32-unknown-unknown | FileCheck %s ; ModuleID = 'test.c' source_filename = "test.c" diff --git a/llvm/test/MC/WebAssembly/libcall.ll b/llvm/test/MC/WebAssembly/libcall.ll index 8b81f150da892aa..ffd32abe2345bc7 100644 --- a/llvm/test/MC/WebAssembly/libcall.ll +++ b/llvm/test/MC/WebAssembly/libcall.ll @@ -1,4 +1,4 @@ -; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s +; RUN: llc -filetype=obj -mattr=-bulk-memory %s -o - | obj2yaml | FileCheck %s target triple = "wasm32-unknown-unknown" From a93c952bab3569c84ae0f9619533f7d7c2477a41 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Fri, 25 Oct 2024 14:21:32 -0700 Subject: [PATCH 061/425] [rtsan][NFC] Documentation of suppression flag (#112727) --- clang/docs/RealtimeSanitizer.rst | 68 ++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/clang/docs/RealtimeSanitizer.rst b/clang/docs/RealtimeSanitizer.rst index 103842e055db700..b09162cd99f450d 100644 --- a/clang/docs/RealtimeSanitizer.rst +++ b/clang/docs/RealtimeSanitizer.rst @@ -183,6 +183,10 @@ A **partial** list of flags RealtimeSanitizer respects: - ``true`` - boolean - If set, use the symbolizer to turn virtual addresses to file/line locations. If false, can greatly speed up the error reporting. + * - ``suppressions`` + - "" + - path + - If set to a valid suppressions file, will suppress issue reporting. See details in "Disabling", below. Some issues with flags can be debugged using the ``verbosity=$NUM`` flag: @@ -194,12 +198,43 @@ Some issues with flags can be debugged using the ``verbosity=$NUM`` flag: misspelled_flag ... -Disabling ---------- +Disabling and suppressing +------------------------- -In some circumstances, you may want to suppress error reporting in a specific scope. +There are multiple ways to disable error reporting when using RealtimeSanitizer. -In C++, this is achieved via ``__rtsan::ScopedDisabler``. Within the scope where the ``ScopedDisabler`` object is instantiated, all sanitizer error reports are suppressed. This suppression applies to the current scope as well as all invoked functions, including any functions called transitively. +In general, ``ScopedDisabler`` should be preferred, as it is the most performant. + +.. list-table:: Suppression methods + :widths: 30 15 15 10 70 + :header-rows: 1 + + * - Method + - Specified at? + - Scope + - Run-time cost + - Description + * - ``ScopedDisabler`` + - Compile-time + - Stack + - Very low + - Violations are ignored for the lifetime of the ``ScopedDisabler`` object. + * - ``function-name-matches`` suppression + - Run-time + - Single function + - Medium + - Suppresses intercepted and ``[[clang::blocking]]`` function calls by name. + * - ``call-stack-contains`` suppression + - Run-time + - Stack + - High + - Suppresses any stack trace contaning the specified pattern. + + +``ScopedDisabler`` +################## + +At compile time, RealtimeSanitizer may be disabled using ``__rtsan::ScopedDisabler``. RTSan ignores any errors originating within the ``ScopedDisabler`` instance variable scope. .. code-block:: c++ @@ -233,6 +268,31 @@ In C, you can use the ``__rtsan_disable()`` and ``rtsan_enable()`` functions to Each call to ``__rtsan_disable()`` must be paired with a subsequent call to ``__rtsan_enable()`` to restore normal sanitizer functionality. If a corresponding ``rtsan_enable()`` call is not made, the behavior is undefined. +Suppression file +################ + +At run-time, suppressions may be specified using a suppressions file passed in ``RTSAN_OPTIONS``. Run-time suppression may be useful if the source cannot be changed. + +.. code-block:: console + + > cat suppressions.supp + call-stack-contains:MallocViolation + call-stack-contains:std::*vector + function-name-matches:free + function-name-matches:CustomMarkedBlocking* + > RTSAN_OPTIONS="suppressions=suppressions.supp" ./a.out + ... + +Suppressions specified in this file are one of two flavors. + +``function-name-matches`` suppresses reporting of any intercepted library call, or function marked ``[[clang::blocking]]`` by name. If, for instance, you know that ``malloc`` is real-time safe on your system, you can disable the check for it via ``function-name-matches:malloc``. + +``call-stack-contains`` suppresses reporting of errors in any stack that contains a string matching the pattern specified. For example, suppressing error reporting of any non-real-time-safe behavior in ``std::vector`` may be specified ``call-stack-contains:std::*vector``. You must include symbols in your build for this method to be effective, unsymbolicated stack traces cannot be matched. ``call-stack-contains`` has the highest run-time cost of any method of suppression. + +Patterns may be exact matches or are "regex-light" patterns, containing special characters such as ``^$*``. + +The number of potential errors suppressed via this method may be seen on exit when using the ``print_stats_on_exit`` flag. + Compile-time sanitizer detection -------------------------------- From 590b1e31546572b62040066f90a35893a1b64f29 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 25 Oct 2024 15:00:07 -0700 Subject: [PATCH 062/425] [clang][modules] Only serialize info for locally-included headers (#113718) I noticed that some PCM files contain `HeaderFileInfo` for headers only included in a dependent PCM file, which is wasteful. This patch changes the logic to only write headers that are included locally. This makes the PCM files smaller and saves some superfluous deserialization of `HeaderFileInfo` triggered by `Preprocessor::alreadyIncluded()`. --- clang/include/clang/Lex/Preprocessor.h | 2 +- clang/lib/Lex/HeaderSearch.cpp | 1 - clang/lib/Serialization/ASTWriter.cpp | 6 +++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index f3f4de044fc41a1..38a527d2324ffe8 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -1490,7 +1490,7 @@ class Preprocessor { /// Mark the file as included. /// Returns true if this is the first time the file was included. bool markIncluded(FileEntryRef File) { - HeaderInfo.getFileInfo(File); + HeaderInfo.getFileInfo(File).IsLocallyIncluded = true; return IncludedFiles.insert(File).second; } diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 8826ab449df4930..052be1395161d4f 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -1582,7 +1582,6 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP, } } - FileInfo.IsLocallyIncluded = true; IsFirstIncludeOfFile = PP.markIncluded(File); return true; } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index b576822fa704c89..c09a41f4d1403cf 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2163,8 +2163,8 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { continue; // We have no information on this being a header file. if (!HFI->isCompilingModuleHeader && HFI->isModuleHeader) continue; // Header file info is tracked by the owning module file. - if (!HFI->isCompilingModuleHeader && !PP->alreadyIncluded(*File)) - continue; // Non-modular header not included is not needed. + if (!HFI->isCompilingModuleHeader && !HFI->IsLocallyIncluded) + continue; // Header file info is tracked by the including module file. // Massage the file path into an appropriate form. StringRef Filename = File->getName(); @@ -2176,7 +2176,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) { SavedStrings.push_back(Filename.data()); } - bool Included = PP->alreadyIncluded(*File); + bool Included = HFI->IsLocallyIncluded || PP->alreadyIncluded(*File); HeaderFileInfoTrait::key_type Key = { Filename, File->getSize(), getTimestampForOutput(*File) From 5f0a62817fc4e68e793e6d5e5c5fb9b614a1c8bf Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 25 Oct 2024 15:07:48 -0700 Subject: [PATCH 063/425] [RISCV] Remove duplicate Smrnmi CSR test cases. NFC ed6ddffb583beb450c4b0e1747ccd14f7e063105 added tests that already existed. --- llvm/test/MC/RISCV/machine-csr-names.s | 60 -------------------------- 1 file changed, 60 deletions(-) diff --git a/llvm/test/MC/RISCV/machine-csr-names.s b/llvm/test/MC/RISCV/machine-csr-names.s index d509f9eadeb5e19..8cfdf7ee116cee6 100644 --- a/llvm/test/MC/RISCV/machine-csr-names.s +++ b/llvm/test/MC/RISCV/machine-csr-names.s @@ -1913,66 +1913,6 @@ csrrs t1, mhpmcounter31, zero csrrs t2, 0xB1F, zero -###################################### -# Machine Counter Setup -###################################### -# mnscratch -# name -# CHECK-INST: csrrs t1, mnscratch, zero -# CHECK-ENC: encoding: [0x73,0x23,0x00,0x74] -# CHECK-INST-ALIAS: csrr t1, mnscratch -# uimm12 -# CHECK-INST: csrrs t2, mnscratch, zero -# CHECK-ENC: encoding: [0xf3,0x23,0x00,0x74] -# CHECK-INST-ALIAS: csrr t2, mnscratch -# name -csrrs t1, mnscratch, zero -# uimm12 -csrrs t2, 0x740, zero - -# mnepc -# name -# CHECK-INST: csrrs t1, mnepc, zero -# CHECK-ENC: encoding: [0x73,0x23,0x10,0x74] -# CHECK-INST-ALIAS: csrr t1, mnepc -# uimm12 -# CHECK-INST: csrrs t2, mnepc, zero -# CHECK-ENC: encoding: [0xf3,0x23,0x10,0x74] -# CHECK-INST-ALIAS: csrr t2, mnepc -# name -csrrs t1, mnepc, zero -# uimm12 -csrrs t2, 0x741, zero - -# mncause -# name -# CHECK-INST: csrrs t1, mncause, zero -# CHECK-ENC: encoding: [0x73,0x23,0x20,0x74] -# CHECK-INST-ALIAS: csrr t1, mncause -# uimm12 -# CHECK-INST: csrrs t2, mncause, zero -# CHECK-ENC: encoding: [0xf3,0x23,0x20,0x74] -# CHECK-INST-ALIAS: csrr t2, mncause -# name -csrrs t1, mncause, zero -# uimm12 -csrrs t2, 0x742, zero - -# mnstatus -# name -# CHECK-INST: csrrs t1, mnstatus, zero -# CHECK-ENC: encoding: [0x73,0x23,0x40,0x74] -# CHECK-INST-ALIAS: csrr t1, mnstatus -# uimm12 -# CHECK-INST: csrrs t2, mnstatus, zero -# CHECK-ENC: encoding: [0xf3,0x23,0x40,0x74] -# CHECK-INST-ALIAS: csrr t2, mnstatus -# name -csrrs t1, mnstatus, zero -# uimm12 -csrrs t2, 0x744, zero - - ###################################### # Machine Counter Setup ###################################### From ccc15cd6f52922e83164b44e927870059c168ddf Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 25 Oct 2024 15:35:01 -0700 Subject: [PATCH 064/425] [lldb] Avoid repeated hash lookups (NFC) (#113412) --- .../InstEmulation/UnwindAssemblyInstEmulation.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp index 49edd40544e32ab..1a680d80a9d3d71 100644 --- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp +++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp @@ -461,8 +461,7 @@ size_t UnwindAssemblyInstEmulation::WriteMemory( if (reg_num != LLDB_INVALID_REGNUM && generic_regnum != LLDB_REGNUM_GENERIC_SP) { - if (m_pushed_regs.find(reg_num) == m_pushed_regs.end()) { - m_pushed_regs[reg_num] = addr; + if (m_pushed_regs.try_emplace(reg_num, addr).second) { const int32_t offset = addr - m_initial_sp; m_curr_row->SetRegisterLocationToAtCFAPlusOffset(reg_num, offset, /*can_replace=*/true); @@ -608,8 +607,8 @@ bool UnwindAssemblyInstEmulation::WriteRegister( generic_regnum != LLDB_REGNUM_GENERIC_SP) { switch (context.GetInfoType()) { case EmulateInstruction::eInfoTypeAddress: - if (m_pushed_regs.find(reg_num) != m_pushed_regs.end() && - context.info.address == m_pushed_regs[reg_num]) { + if (auto it = m_pushed_regs.find(reg_num); + it != m_pushed_regs.end() && context.info.address == it->second) { m_curr_row->SetRegisterLocationToSame(reg_num, false /*must_replace*/); m_curr_row_modified = true; From 38caf282ab3dfd69cee213b0d4df2f1672e52b38 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 25 Oct 2024 15:39:52 -0700 Subject: [PATCH 065/425] [SHT_LLVM_BB_ADDR_MAP][AsmPrinter] Add none and all options to PGO Map (#111221) This patch adds none and all options to the -pgo-analysis-map flag, which do basically what they say on the tin. The none option is added to enable forcing the pgo-analysis-map by overriding an earlier invocation of the flag. The all option is just added for convenience. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 31 +++++++++++++------ .../basic-block-address-map-pgo-features.ll | 7 ++++- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index bf4c707cca06d5b..2d444f2f970ac16 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -141,18 +141,22 @@ using namespace llvm; // `object::PGOAnalysisMap::Features::decode(PgoAnalysisMapFeatures.getBits())` // succeeds. enum class PGOMapFeaturesEnum { + None, FuncEntryCount, BBFreq, BrProb, + All, }; static cl::bits PgoAnalysisMapFeatures( "pgo-analysis-map", cl::Hidden, cl::CommaSeparated, - cl::values(clEnumValN(PGOMapFeaturesEnum::FuncEntryCount, - "func-entry-count", "Function Entry Count"), - clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq", - "Basic Block Frequency"), - clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", - "Branch Probability")), + cl::values( + clEnumValN(PGOMapFeaturesEnum::None, "none", "Disable all options"), + clEnumValN(PGOMapFeaturesEnum::FuncEntryCount, "func-entry-count", + "Function Entry Count"), + clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq", + "Basic Block Frequency"), + clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", "Branch Probability"), + clEnumValN(PGOMapFeaturesEnum::All, "all", "Enable all options")), cl::desc( "Enable extended information within the SHT_LLVM_BB_ADDR_MAP that is " "extracted from PGO related analysis.")); @@ -1367,9 +1371,18 @@ static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) { static llvm::object::BBAddrMap::Features getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) { - return {PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::FuncEntryCount), - PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq), - PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BrProb), + bool NoFeatures = PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::None); + bool AllFeatures = PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::All); + bool FuncEntryCountEnabled = + AllFeatures || (!NoFeatures && PgoAnalysisMapFeatures.isSet( + PGOMapFeaturesEnum::FuncEntryCount)); + bool BBFreqEnabled = + AllFeatures || + (!NoFeatures && PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq)); + bool BrProbEnabled = + AllFeatures || + (!NoFeatures && PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BrProb)); + return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled, MF.hasBBSections() && NumMBBSectionRanges > 1}; } diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll index 73fe4f6ffedb0e1..1c3db738a94768b 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll @@ -1,8 +1,10 @@ ; Check the basic block sections labels option -; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC,PGO-NONE +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=none | FileCheck %s --check-prefixes=CHECK,BASIC,PGO-NONE ;; Also verify this holds for all PGO features enabled ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP +; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=all | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP ;; Also verify that pgo extension only includes the enabled feature ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY @@ -93,6 +95,9 @@ declare i32 @__gxx_personality_v0(...) ; CHECK-NEXT: .byte 4 ;; PGO Analysis Map +; PGO-NONE-NOT: .byte 100 # function entry count +; PGO-NONE-NOT: .ascii "\271\235\376\332\245\200\356\017" # basic block frequency +; PGO-NONE-NOT: .byte 2 # basic block successor count ; PGO-FEC-NEXT: .byte 100 # function entry count ; PGO-BBF-NEXT: .ascii "\271\235\376\332\245\200\356\017" # basic block frequency ; PGO-BRP-NEXT: .byte 2 # basic block successor count From 242ccd2eb1069c817d44545010dfe185a4c3d0b1 Mon Sep 17 00:00:00 2001 From: Gang Chen Date: Fri, 25 Oct 2024 15:52:31 -0700 Subject: [PATCH 066/425] [clang] update the number in no-external-type-id.cppm (#113738) This should fix https://linaro.atlassian.net/browse/LLVM-1411 --- clang/test/Modules/no-external-type-id.cppm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Modules/no-external-type-id.cppm b/clang/test/Modules/no-external-type-id.cppm index 6385f3a8aa00b24..d067e574e72e37b 100644 --- a/clang/test/Modules/no-external-type-id.cppm +++ b/clang/test/Modules/no-external-type-id.cppm @@ -23,7 +23,7 @@ export module b; import a; export int b(); -// CHECK: Date: Fri, 25 Oct 2024 16:08:45 -0700 Subject: [PATCH 067/425] Adding CUFCommon.{h,cpp} for CUF utilities (#113740) --- .../flang/Optimizer/Transforms/CUFCommon.h | 25 +++++++++++++++ flang/lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../Transforms/CUFAddConstructor.cpp | 7 ++--- flang/lib/Optimizer/Transforms/CUFCommon.cpp | 31 +++++++++++++++++++ 4 files changed, 60 insertions(+), 4 deletions(-) create mode 100644 flang/include/flang/Optimizer/Transforms/CUFCommon.h create mode 100644 flang/lib/Optimizer/Transforms/CUFCommon.cpp diff --git a/flang/include/flang/Optimizer/Transforms/CUFCommon.h b/flang/include/flang/Optimizer/Transforms/CUFCommon.h new file mode 100644 index 000000000000000..b88133489df5e24 --- /dev/null +++ b/flang/include/flang/Optimizer/Transforms/CUFCommon.h @@ -0,0 +1,25 @@ +//===-- CUFCommon.h -------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_TRANSFORMS_CUFCOMMON_H_ +#define FORTRAN_OPTIMIZER_TRANSFORMS_CUFCOMMON_H_ + +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/IR/BuiltinOps.h" + +static constexpr llvm::StringRef cudaDeviceModuleName = "cuda_device_mod"; + +namespace cuf { + +/// Retrieve or create the CUDA Fortran GPU module in the given \p mod. +mlir::gpu::GPUModuleOp getOrCreateGPUModule(mlir::ModuleOp mod, + mlir::SymbolTable &symTab); + +} // namespace cuf + +#endif // FORTRAN_OPTIMIZER_TRANSFORMS_CUFCOMMON_H_ diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index d20d3bc4108ce94..9eafa4ec234bddf 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -9,6 +9,7 @@ add_flang_library(FIRTransforms CompilerGeneratedNames.cpp ConstantArgumentGlobalisation.cpp ControlFlowConverter.cpp + CUFCommon.cpp CUFAddConstructor.cpp CUFDeviceGlobal.cpp CUFOpConversion.cpp diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp index f260437e7104171..4da06be8ef7dd95 100644 --- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp +++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp @@ -11,6 +11,7 @@ #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" +#include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Runtime/entry-names.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -24,8 +25,6 @@ namespace fir { namespace { -static constexpr llvm::StringRef cudaModName{"cuda_device_mod"}; - static constexpr llvm::StringRef cudaFortranCtorName{ "__cudaFortranConstructor"}; @@ -60,7 +59,7 @@ struct CUFAddConstructor builder.create(loc, funcTy, cufRegisterAllocatorRef); // Register kernels - auto gpuMod = symTab.lookup(cudaModName); + auto gpuMod = symTab.lookup(cudaDeviceModuleName); if (gpuMod) { auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(ctx); auto registeredMod = builder.create( @@ -68,7 +67,7 @@ struct CUFAddConstructor for (auto func : gpuMod.getOps()) { if (func.isKernel()) { auto kernelName = mlir::SymbolRefAttr::get( - builder.getStringAttr(cudaModName), + builder.getStringAttr(cudaDeviceModuleName), {mlir::SymbolRefAttr::get(builder.getContext(), func.getName())}); builder.create(loc, kernelName, registeredMod); } diff --git a/flang/lib/Optimizer/Transforms/CUFCommon.cpp b/flang/lib/Optimizer/Transforms/CUFCommon.cpp new file mode 100644 index 000000000000000..5eca86529f9e17c --- /dev/null +++ b/flang/lib/Optimizer/Transforms/CUFCommon.cpp @@ -0,0 +1,31 @@ +//===-- CUFCommon.cpp - Shared functions between passes ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Transforms/CUFCommon.h" +#include "mlir/Dialect/LLVMIR/NVVMDialect.h" + +/// Retrieve or create the CUDA Fortran GPU module in the give in \p mod. +mlir::gpu::GPUModuleOp cuf::getOrCreateGPUModule(mlir::ModuleOp mod, + mlir::SymbolTable &symTab) { + if (auto gpuMod = symTab.lookup(cudaDeviceModuleName)) + return gpuMod; + + auto *ctx = mod.getContext(); + mod->setAttr(mlir::gpu::GPUDialect::getContainerModuleAttrName(), + mlir::UnitAttr::get(ctx)); + + mlir::OpBuilder builder(ctx); + auto gpuMod = builder.create(mod.getLoc(), + cudaDeviceModuleName); + llvm::SmallVector targets; + targets.push_back(mlir::NVVM::NVVMTargetAttr::get(ctx)); + gpuMod.setTargetsAttr(builder.getArrayAttr(targets)); + mlir::Block::iterator insertPt(mod.getBodyRegion().front().end()); + symTab.insert(gpuMod, insertPt); + return gpuMod; +} From 75c1c2614aea23647871dcfa161cf0fad3b19312 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Wed, 25 Sep 2024 08:20:05 -0700 Subject: [PATCH 068/425] [llvm][TLI] Sort a switch's cases. NFC --- .../include/llvm/Analysis/TargetLibraryInfo.h | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 9e543b844ad768f..aeb8de3973f7321 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -408,35 +408,35 @@ class TargetLibraryInfo { switch (F) { default: break; // clang-format off - case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: - case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: - case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl: - case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl: - case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl: - case LibFunc_asin: case LibFunc_asinf: case LibFunc_asinl: case LibFunc_acos: case LibFunc_acosf: case LibFunc_acosl: + case LibFunc_asin: case LibFunc_asinf: case LibFunc_asinl: case LibFunc_atan: case LibFunc_atanf: case LibFunc_atanl: - case LibFunc_sinh: case LibFunc_sinhf: case LibFunc_sinhl: + case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill: + case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: + case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl: case LibFunc_cosh: case LibFunc_coshf: case LibFunc_coshl: - case LibFunc_tanh: case LibFunc_tanhf: case LibFunc_tanhl: - case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: - case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite: - case LibFunc_sqrtl_finite: + case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l: + case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: + case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl: case LibFunc_fmin: case LibFunc_fminf: case LibFunc_fminl: - case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: + case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: + case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l: + case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp: + case LibFunc_memcpy: case LibFunc_memset: case LibFunc_memmove: case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_nearbyintl: - case LibFunc_ceil: case LibFunc_ceilf: case LibFunc_ceill: case LibFunc_rint: case LibFunc_rintf: case LibFunc_rintl: case LibFunc_round: case LibFunc_roundf: case LibFunc_roundl: - case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: - case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l: - case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l: - case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: - case LibFunc_memcpy: case LibFunc_memset: case LibFunc_memmove: - case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp: + case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl: + case LibFunc_sinh: case LibFunc_sinhf: case LibFunc_sinhl: + case LibFunc_sqrt: case LibFunc_sqrtf: case LibFunc_sqrtl: + case LibFunc_sqrt_finite: case LibFunc_sqrtf_finite: + case LibFunc_sqrtl_finite: case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen: case LibFunc_strnlen: case LibFunc_memchr: case LibFunc_mempcpy: + case LibFunc_tan: case LibFunc_tanf: case LibFunc_tanl: + case LibFunc_tanh: case LibFunc_tanhf: case LibFunc_tanhl: + case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl: // clang-format on return true; } From 054c23d78f2e1aba1dd353aec19e00993ca452fa Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 25 Oct 2024 16:22:24 -0700 Subject: [PATCH 069/425] X86: Improve cost model of fp16 conversion (#113195) Improve cost-modeling for x86 __fp16 conversions so the SLPVectorizer transforms the patterns: - Override `X86TTIImpl::getStoreMinimumVF` to report a minimum VF of 4 (SSE register can hold 4xfloat converted/stored to 4xf16) this is necessary as fp16 stores are neither modeled as trunc-stores nor can we mark direct Xxfp16 stores as legal as we generally expand fp16 operations). - Add missing cost entries to `X86TTIImpl::getCastInstrCost` conversion from/to fp16. Note that conversion from f64 to f16 is not supported by an X86 instruction. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 41 ++ llvm/lib/Target/X86/X86TargetTransformInfo.h | 3 + .../SLPVectorizer/X86/conversion-fp16.ll | 606 ++++++++++++++++++ 3 files changed, 650 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 413ef0136d5c06f..bae223243b3dc98 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2296,7 +2296,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, { 1, 1, 1, 1 } }, { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, { 3, 1, 1, 1 } }, { ISD::FP_EXTEND, MVT::v16f64, MVT::v16f32, { 4, 1, 1, 1 } }, // 2*vcvtps2pd+vextractf64x4 + { ISD::FP_EXTEND, MVT::v16f32, MVT::v16f16, { 1, 1, 1, 1 } }, // vcvtph2ps + { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, { 1, 1, 1, 1 } }, + { ISD::FP_ROUND, MVT::v16f16, MVT::v16f32, { 1, 1, 1, 1 } }, // vcvtps2ph { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd @@ -2973,6 +2976,17 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v4i32, MVT::v2i64, { 1, 1, 1, 1 } }, // PSHUFD }; + static const TypeConversionCostKindTblEntry F16ConversionTbl[] = { + { ISD::FP_ROUND, MVT::f16, MVT::f32, { 1, 1, 1, 1 } }, + { ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, { 1, 1, 1, 1 } }, + { ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, { 1, 1, 1, 1 } }, + { ISD::FP_EXTEND, MVT::f32, MVT::f16, { 1, 1, 1, 1 } }, + { ISD::FP_EXTEND, MVT::f64, MVT::f16, { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd + { ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, { 1, 1, 1, 1 } }, + { ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, { 1, 1, 1, 1 } }, + { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd + }; + // Attempt to map directly to (simple) MVT types to let us match custom entries. EVT SrcTy = TLI->getValueType(DL, Src); EVT DstTy = TLI->getValueType(DL, Dst); @@ -3034,6 +3048,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return *KindCost; } + if (ST->hasF16C()) { + if (const auto *Entry = ConvertCostTableLookup(F16ConversionTbl, ISD, + SimpleDstTy, SimpleSrcTy)) + if (auto KindCost = Entry->Cost[CostKind]) + return *KindCost; + } + if (ST->hasSSE41()) { if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) @@ -3107,6 +3128,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (auto KindCost = Entry->Cost[CostKind]) return std::max(LTSrc.first, LTDest.first) * *KindCost; + if (ST->hasF16C()) { + if (const auto *Entry = ConvertCostTableLookup(F16ConversionTbl, ISD, + LTDest.second, LTSrc.second)) + if (auto KindCost = Entry->Cost[CostKind]) + return std::max(LTSrc.first, LTDest.first) * *KindCost; + } + if (ST->hasSSE41()) if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, LTDest.second, LTSrc.second)) @@ -3146,6 +3174,11 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, TTI::CastContextHint::None, CostKind); } + if (ISD == ISD::FP_ROUND && LTDest.second.getScalarType() == MVT::f16) { + // Conversion requires a libcall. + return InstructionCost::getInvalid(); + } + // TODO: Allow non-throughput costs that aren't binary. auto AdjustCost = [&CostKind](InstructionCost Cost, InstructionCost N = 1) -> InstructionCost { @@ -6923,6 +6956,14 @@ bool X86TTIImpl::isVectorShiftByScalarCheap(Type *Ty) const { return true; } +unsigned X86TTIImpl::getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, + Type *ScalarValTy) const { + if (ST->hasF16C() && ScalarMemTy->isHalfTy()) { + return 4; + } + return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); +} + bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I, SmallVectorImpl &Ops) const { using namespace llvm::PatternMatch; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 0100f328ab4bd30..36d00cee0d18b57 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -302,6 +302,9 @@ class X86TTIImpl : public BasicTTIImplBase { bool isVectorShiftByScalarCheap(Type *Ty) const; + unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, + Type *ScalarValTy) const; + private: bool supportsGather() const; InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind, diff --git a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll new file mode 100644 index 000000000000000..bcea147d724f53b --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll @@ -0,0 +1,606 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -mtriple=x86_64-- -passes=slp-vectorizer -S -mattr=+avx2 | FileCheck %s --check-prefix=CHECK +; RUN: opt < %s -mtriple=x86_64-- -passes=slp-vectorizer -S -mattr=+avx2 -mattr=+f16c | FileCheck %s --check-prefix=CHECK-F16C +; RUN: opt < %s -mtriple=x86_64-- -passes=slp-vectorizer -S -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-AVX512 + +define void @fpext_v4xf16_v4xf32(ptr %s0, ptr %d0) { +; CHECK-LABEL: define void @fpext_v4xf16_v4xf32( +; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1 +; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2 +; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 3 +; CHECK-NEXT: [[L0:%.*]] = load half, ptr [[S0]], align 2 +; CHECK-NEXT: [[L1:%.*]] = load half, ptr [[S1]], align 2 +; CHECK-NEXT: [[L2:%.*]] = load half, ptr [[S2]], align 2 +; CHECK-NEXT: [[L3:%.*]] = load half, ptr [[S3]], align 2 +; CHECK-NEXT: [[E0:%.*]] = fpext half [[L0]] to float +; CHECK-NEXT: [[E1:%.*]] = fpext half [[L1]] to float +; CHECK-NEXT: [[E2:%.*]] = fpext half [[L2]] to float +; CHECK-NEXT: [[E3:%.*]] = fpext half [[L3]] to float +; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 1 +; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 2 +; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 3 +; CHECK-NEXT: store float [[E0]], ptr [[D0]], align 8 +; CHECK-NEXT: store float [[E1]], ptr [[D1]], align 8 +; CHECK-NEXT: store float [[E2]], ptr [[D2]], align 8 +; CHECK-NEXT: store float [[E3]], ptr [[D3]], align 8 +; CHECK-NEXT: ret void +; +; CHECK-F16C-LABEL: define void @fpext_v4xf16_v4xf32( +; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2 +; CHECK-F16C-NEXT: [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x float> +; CHECK-F16C-NEXT: store <4 x float> [[TMP2]], ptr [[D0]], align 8 +; CHECK-F16C-NEXT: ret void +; +; CHECK-AVX512-LABEL: define void @fpext_v4xf16_v4xf32( +; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2 +; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x float> +; CHECK-AVX512-NEXT: store <4 x float> [[TMP2]], ptr [[D0]], align 8 +; CHECK-AVX512-NEXT: ret void +; + %s1 = getelementptr inbounds half, ptr %s0, i64 1 + %s2 = getelementptr inbounds half, ptr %s0, i64 2 + %s3 = getelementptr inbounds half, ptr %s0, i64 3 + %l0 = load half, ptr %s0, align 2 + %l1 = load half, ptr %s1, align 2 + %l2 = load half, ptr %s2, align 2 + %l3 = load half, ptr %s3, align 2 + + %e0 = fpext half %l0 to float + %e1 = fpext half %l1 to float + %e2 = fpext half %l2 to float + %e3 = fpext half %l3 to float + + %d1 = getelementptr inbounds float, ptr %d0, i64 1 + %d2 = getelementptr inbounds float, ptr %d0, i64 2 + %d3 = getelementptr inbounds float, ptr %d0, i64 3 + store float %e0, ptr %d0, align 8 + store float %e1, ptr %d1, align 8 + store float %e2, ptr %d2, align 8 + store float %e3, ptr %d3, align 8 + ret void +} + +define void @fpext_v4xf16_v4xf64(ptr %s0, ptr %d0) { +; CHECK-LABEL: define void @fpext_v4xf16_v4xf64( +; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1 +; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2 +; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 3 +; CHECK-NEXT: [[L0:%.*]] = load half, ptr [[S0]], align 2 +; CHECK-NEXT: [[L1:%.*]] = load half, ptr [[S1]], align 2 +; CHECK-NEXT: [[L2:%.*]] = load half, ptr [[S2]], align 2 +; CHECK-NEXT: [[L3:%.*]] = load half, ptr [[S3]], align 2 +; CHECK-NEXT: [[E0:%.*]] = fpext half [[L0]] to double +; CHECK-NEXT: [[E1:%.*]] = fpext half [[L1]] to double +; CHECK-NEXT: [[E2:%.*]] = fpext half [[L2]] to double +; CHECK-NEXT: [[E3:%.*]] = fpext half [[L3]] to double +; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds double, ptr [[D0]], i64 1 +; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds double, ptr [[D0]], i64 2 +; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds double, ptr [[D0]], i64 3 +; CHECK-NEXT: store double [[E0]], ptr [[D0]], align 8 +; CHECK-NEXT: store double [[E1]], ptr [[D1]], align 8 +; CHECK-NEXT: store double [[E2]], ptr [[D2]], align 8 +; CHECK-NEXT: store double [[E3]], ptr [[D3]], align 8 +; CHECK-NEXT: ret void +; +; CHECK-F16C-LABEL: define void @fpext_v4xf16_v4xf64( +; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2 +; CHECK-F16C-NEXT: [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x double> +; CHECK-F16C-NEXT: store <4 x double> [[TMP2]], ptr [[D0]], align 8 +; CHECK-F16C-NEXT: ret void +; +; CHECK-AVX512-LABEL: define void @fpext_v4xf16_v4xf64( +; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2 +; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x double> +; CHECK-AVX512-NEXT: store <4 x double> [[TMP2]], ptr [[D0]], align 8 +; CHECK-AVX512-NEXT: ret void +; + %s1 = getelementptr inbounds half, ptr %s0, i64 1 + %s2 = getelementptr inbounds half, ptr %s0, i64 2 + %s3 = getelementptr inbounds half, ptr %s0, i64 3 + %l0 = load half, ptr %s0, align 2 + %l1 = load half, ptr %s1, align 2 + %l2 = load half, ptr %s2, align 2 + %l3 = load half, ptr %s3, align 2 + + %e0 = fpext half %l0 to double + %e1 = fpext half %l1 to double + %e2 = fpext half %l2 to double + %e3 = fpext half %l3 to double + + %d1 = getelementptr inbounds double, ptr %d0, i64 1 + %d2 = getelementptr inbounds double, ptr %d0, i64 2 + %d3 = getelementptr inbounds double, ptr %d0, i64 3 + store double %e0, ptr %d0, align 8 + store double %e1, ptr %d1, align 8 + store double %e2, ptr %d2, align 8 + store double %e3, ptr %d3, align 8 + ret void +} + +define void @fpext_v16xf16_v16xf32(ptr %s0, ptr %d0) { +; CHECK-LABEL: define void @fpext_v16xf16_v16xf32( +; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1 +; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2 +; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 3 +; CHECK-NEXT: [[S4:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 4 +; CHECK-NEXT: [[S5:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 5 +; CHECK-NEXT: [[S6:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 6 +; CHECK-NEXT: [[S7:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 7 +; CHECK-NEXT: [[S8:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 8 +; CHECK-NEXT: [[S9:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 9 +; CHECK-NEXT: [[S10:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 10 +; CHECK-NEXT: [[S11:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 11 +; CHECK-NEXT: [[S12:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 12 +; CHECK-NEXT: [[S13:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 13 +; CHECK-NEXT: [[S14:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 14 +; CHECK-NEXT: [[S15:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 15 +; CHECK-NEXT: [[L0:%.*]] = load half, ptr [[S0]], align 2 +; CHECK-NEXT: [[L1:%.*]] = load half, ptr [[S1]], align 2 +; CHECK-NEXT: [[L2:%.*]] = load half, ptr [[S2]], align 2 +; CHECK-NEXT: [[L3:%.*]] = load half, ptr [[S3]], align 2 +; CHECK-NEXT: [[L4:%.*]] = load half, ptr [[S4]], align 2 +; CHECK-NEXT: [[L5:%.*]] = load half, ptr [[S5]], align 2 +; CHECK-NEXT: [[L6:%.*]] = load half, ptr [[S6]], align 2 +; CHECK-NEXT: [[L7:%.*]] = load half, ptr [[S7]], align 2 +; CHECK-NEXT: [[L8:%.*]] = load half, ptr [[S8]], align 2 +; CHECK-NEXT: [[L9:%.*]] = load half, ptr [[S9]], align 2 +; CHECK-NEXT: [[L10:%.*]] = load half, ptr [[S10]], align 2 +; CHECK-NEXT: [[L11:%.*]] = load half, ptr [[S11]], align 2 +; CHECK-NEXT: [[L12:%.*]] = load half, ptr [[S12]], align 2 +; CHECK-NEXT: [[L13:%.*]] = load half, ptr [[S13]], align 2 +; CHECK-NEXT: [[L14:%.*]] = load half, ptr [[S14]], align 2 +; CHECK-NEXT: [[L15:%.*]] = load half, ptr [[S15]], align 2 +; CHECK-NEXT: [[E0:%.*]] = fpext half [[L0]] to float +; CHECK-NEXT: [[E1:%.*]] = fpext half [[L1]] to float +; CHECK-NEXT: [[E2:%.*]] = fpext half [[L2]] to float +; CHECK-NEXT: [[E3:%.*]] = fpext half [[L3]] to float +; CHECK-NEXT: [[E4:%.*]] = fpext half [[L4]] to float +; CHECK-NEXT: [[E5:%.*]] = fpext half [[L5]] to float +; CHECK-NEXT: [[E6:%.*]] = fpext half [[L6]] to float +; CHECK-NEXT: [[E7:%.*]] = fpext half [[L7]] to float +; CHECK-NEXT: [[E8:%.*]] = fpext half [[L8]] to float +; CHECK-NEXT: [[E9:%.*]] = fpext half [[L9]] to float +; CHECK-NEXT: [[E10:%.*]] = fpext half [[L10]] to float +; CHECK-NEXT: [[E11:%.*]] = fpext half [[L11]] to float +; CHECK-NEXT: [[E12:%.*]] = fpext half [[L12]] to float +; CHECK-NEXT: [[E13:%.*]] = fpext half [[L13]] to float +; CHECK-NEXT: [[E14:%.*]] = fpext half [[L14]] to float +; CHECK-NEXT: [[E15:%.*]] = fpext half [[L15]] to float +; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 1 +; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 2 +; CHECK-NEXT: [[D15:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 3 +; CHECK-NEXT: [[D4:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 4 +; CHECK-NEXT: [[D5:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 5 +; CHECK-NEXT: [[D6:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 6 +; CHECK-NEXT: [[D7:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 7 +; CHECK-NEXT: [[D8:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 8 +; CHECK-NEXT: [[D9:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 9 +; CHECK-NEXT: [[D10:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 10 +; CHECK-NEXT: [[D11:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 11 +; CHECK-NEXT: [[D12:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 12 +; CHECK-NEXT: [[D13:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 13 +; CHECK-NEXT: [[D14:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 14 +; CHECK-NEXT: [[D16:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 15 +; CHECK-NEXT: store float [[E0]], ptr [[D0]], align 8 +; CHECK-NEXT: store float [[E1]], ptr [[D1]], align 8 +; CHECK-NEXT: store float [[E2]], ptr [[D2]], align 8 +; CHECK-NEXT: store float [[E3]], ptr [[D15]], align 8 +; CHECK-NEXT: store float [[E4]], ptr [[D4]], align 8 +; CHECK-NEXT: store float [[E5]], ptr [[D5]], align 8 +; CHECK-NEXT: store float [[E6]], ptr [[D6]], align 8 +; CHECK-NEXT: store float [[E7]], ptr [[D7]], align 8 +; CHECK-NEXT: store float [[E8]], ptr [[D8]], align 8 +; CHECK-NEXT: store float [[E9]], ptr [[D9]], align 8 +; CHECK-NEXT: store float [[E10]], ptr [[D10]], align 8 +; CHECK-NEXT: store float [[E11]], ptr [[D11]], align 8 +; CHECK-NEXT: store float [[E12]], ptr [[D12]], align 8 +; CHECK-NEXT: store float [[E13]], ptr [[D13]], align 8 +; CHECK-NEXT: store float [[E14]], ptr [[D14]], align 8 +; CHECK-NEXT: store float [[E15]], ptr [[D16]], align 8 +; CHECK-NEXT: ret void +; +; CHECK-F16C-LABEL: define void @fpext_v16xf16_v16xf32( +; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 8 +; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 8 +; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[S0]], align 2 +; CHECK-F16C-NEXT: [[TMP2:%.*]] = fpext <8 x half> [[TMP1]] to <8 x float> +; CHECK-F16C-NEXT: [[TMP3:%.*]] = load <8 x half>, ptr [[S8]], align 2 +; CHECK-F16C-NEXT: [[TMP4:%.*]] = fpext <8 x half> [[TMP3]] to <8 x float> +; CHECK-F16C-NEXT: store <8 x float> [[TMP2]], ptr [[D0]], align 8 +; CHECK-F16C-NEXT: store <8 x float> [[TMP4]], ptr [[D8]], align 8 +; CHECK-F16C-NEXT: ret void +; +; CHECK-AVX512-LABEL: define void @fpext_v16xf16_v16xf32( +; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <16 x half>, ptr [[S0]], align 2 +; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fpext <16 x half> [[TMP1]] to <16 x float> +; CHECK-AVX512-NEXT: store <16 x float> [[TMP2]], ptr [[D0]], align 8 +; CHECK-AVX512-NEXT: ret void +; + %s1 = getelementptr inbounds half, ptr %s0, i64 1 + %s2 = getelementptr inbounds half, ptr %s0, i64 2 + %s3 = getelementptr inbounds half, ptr %s0, i64 3 + %s4 = getelementptr inbounds half, ptr %s0, i64 4 + %s5 = getelementptr inbounds half, ptr %s0, i64 5 + %s6 = getelementptr inbounds half, ptr %s0, i64 6 + %s7 = getelementptr inbounds half, ptr %s0, i64 7 + %s8 = getelementptr inbounds half, ptr %s0, i64 8 + %s9 = getelementptr inbounds half, ptr %s0, i64 9 + %s10 = getelementptr inbounds half, ptr %s0, i64 10 + %s11 = getelementptr inbounds half, ptr %s0, i64 11 + %s12 = getelementptr inbounds half, ptr %s0, i64 12 + %s13 = getelementptr inbounds half, ptr %s0, i64 13 + %s14 = getelementptr inbounds half, ptr %s0, i64 14 + %s15 = getelementptr inbounds half, ptr %s0, i64 15 + %l0 = load half, ptr %s0, align 2 + %l1 = load half, ptr %s1, align 2 + %l2 = load half, ptr %s2, align 2 + %l3 = load half, ptr %s3, align 2 + %l4 = load half, ptr %s4, align 2 + %l5 = load half, ptr %s5, align 2 + %l6 = load half, ptr %s6, align 2 + %l7 = load half, ptr %s7, align 2 + %l8 = load half, ptr %s8, align 2 + %l9 = load half, ptr %s9, align 2 + %l10 = load half, ptr %s10, align 2 + %l11 = load half, ptr %s11, align 2 + %l12 = load half, ptr %s12, align 2 + %l13 = load half, ptr %s13, align 2 + %l14 = load half, ptr %s14, align 2 + %l15 = load half, ptr %s15, align 2 + + %e0 = fpext half %l0 to float + %e1 = fpext half %l1 to float + %e2 = fpext half %l2 to float + %e3 = fpext half %l3 to float + %e4 = fpext half %l4 to float + %e5 = fpext half %l5 to float + %e6 = fpext half %l6 to float + %e7 = fpext half %l7 to float + %e8 = fpext half %l8 to float + %e9 = fpext half %l9 to float + %e10 = fpext half %l10 to float + %e11 = fpext half %l11 to float + %e12 = fpext half %l12 to float + %e13 = fpext half %l13 to float + %e14 = fpext half %l14 to float + %e15 = fpext half %l15 to float + + %d1 = getelementptr inbounds float, ptr %d0, i64 1 + %d2 = getelementptr inbounds float, ptr %d0, i64 2 + %d3 = getelementptr inbounds float, ptr %d0, i64 3 + %d4 = getelementptr inbounds float, ptr %d0, i64 4 + %d5 = getelementptr inbounds float, ptr %d0, i64 5 + %d6 = getelementptr inbounds float, ptr %d0, i64 6 + %d7 = getelementptr inbounds float, ptr %d0, i64 7 + %d8 = getelementptr inbounds float, ptr %d0, i64 8 + %d9 = getelementptr inbounds float, ptr %d0, i64 9 + %d10 = getelementptr inbounds float, ptr %d0, i64 10 + %d11 = getelementptr inbounds float, ptr %d0, i64 11 + %d12 = getelementptr inbounds float, ptr %d0, i64 12 + %d13 = getelementptr inbounds float, ptr %d0, i64 13 + %d14 = getelementptr inbounds float, ptr %d0, i64 14 + %d15 = getelementptr inbounds float, ptr %d0, i64 15 + store float %e0, ptr %d0, align 8 + store float %e1, ptr %d1, align 8 + store float %e2, ptr %d2, align 8 + store float %e3, ptr %d3, align 8 + store float %e4, ptr %d4, align 8 + store float %e5, ptr %d5, align 8 + store float %e6, ptr %d6, align 8 + store float %e7, ptr %d7, align 8 + store float %e8, ptr %d8, align 8 + store float %e9, ptr %d9, align 8 + store float %e10, ptr %d10, align 8 + store float %e11, ptr %d11, align 8 + store float %e12, ptr %d12, align 8 + store float %e13, ptr %d13, align 8 + store float %e14, ptr %d14, align 8 + store float %e15, ptr %d15, align 8 + ret void +} + +define void @fpround_v4xf32_v4xf16(ptr %s0, ptr %d0) { +; CHECK-LABEL: define void @fpround_v4xf32_v4xf16( +; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 1 +; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 2 +; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 3 +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[S0]], align 4 +; CHECK-NEXT: [[L1:%.*]] = load float, ptr [[S1]], align 4 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[S2]], align 4 +; CHECK-NEXT: [[L3:%.*]] = load float, ptr [[S3]], align 4 +; CHECK-NEXT: [[T0:%.*]] = fptrunc float [[L0]] to half +; CHECK-NEXT: [[T1:%.*]] = fptrunc float [[L1]] to half +; CHECK-NEXT: [[T2:%.*]] = fptrunc float [[L2]] to half +; CHECK-NEXT: [[T3:%.*]] = fptrunc float [[L3]] to half +; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 +; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 2 +; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 3 +; CHECK-NEXT: store half [[T0]], ptr [[D0]], align 2 +; CHECK-NEXT: store half [[T1]], ptr [[D1]], align 2 +; CHECK-NEXT: store half [[T2]], ptr [[D2]], align 2 +; CHECK-NEXT: store half [[T3]], ptr [[D3]], align 2 +; CHECK-NEXT: ret void +; +; CHECK-F16C-LABEL: define void @fpround_v4xf32_v4xf16( +; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[S0]], align 4 +; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <4 x float> [[TMP1]] to <4 x half> +; CHECK-F16C-NEXT: store <4 x half> [[TMP2]], ptr [[D0]], align 2 +; CHECK-F16C-NEXT: ret void +; +; CHECK-AVX512-LABEL: define void @fpround_v4xf32_v4xf16( +; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[S0]], align 4 +; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fptrunc <4 x float> [[TMP1]] to <4 x half> +; CHECK-AVX512-NEXT: store <4 x half> [[TMP2]], ptr [[D0]], align 2 +; CHECK-AVX512-NEXT: ret void +; + %s1 = getelementptr inbounds float, ptr %s0, i64 1 + %s2 = getelementptr inbounds float, ptr %s0, i64 2 + %s3 = getelementptr inbounds float, ptr %s0, i64 3 + %l0 = load float, ptr %s0, align 4 + %l1 = load float, ptr %s1, align 4 + %l2 = load float, ptr %s2, align 4 + %l3 = load float, ptr %s3, align 4 + + %t0 = fptrunc float %l0 to half + %t1 = fptrunc float %l1 to half + %t2 = fptrunc float %l2 to half + %t3 = fptrunc float %l3 to half + + %d1 = getelementptr inbounds half, ptr %d0, i64 1 + %d2 = getelementptr inbounds half, ptr %d0, i64 2 + %d3 = getelementptr inbounds half, ptr %d0, i64 3 + store half %t0, ptr %d0, align 2 + store half %t1, ptr %d1, align 2 + store half %t2, ptr %d2, align 2 + store half %t3, ptr %d3, align 2 + ret void +} + +define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) { +; CHECK-LABEL: define void @fpround_v16xf32_v16xf16( +; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 1 +; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 2 +; CHECK-NEXT: [[S3:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 3 +; CHECK-NEXT: [[S4:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 4 +; CHECK-NEXT: [[S5:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 5 +; CHECK-NEXT: [[S6:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 6 +; CHECK-NEXT: [[S7:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 7 +; CHECK-NEXT: [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8 +; CHECK-NEXT: [[S9:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 9 +; CHECK-NEXT: [[S10:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 10 +; CHECK-NEXT: [[S11:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 11 +; CHECK-NEXT: [[S12:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 12 +; CHECK-NEXT: [[S13:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 13 +; CHECK-NEXT: [[S14:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 14 +; CHECK-NEXT: [[S15:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 15 +; CHECK-NEXT: [[L0:%.*]] = load float, ptr [[S0]], align 4 +; CHECK-NEXT: [[L1:%.*]] = load float, ptr [[S1]], align 4 +; CHECK-NEXT: [[L2:%.*]] = load float, ptr [[S2]], align 4 +; CHECK-NEXT: [[L3:%.*]] = load float, ptr [[S3]], align 4 +; CHECK-NEXT: [[L4:%.*]] = load float, ptr [[S4]], align 4 +; CHECK-NEXT: [[L5:%.*]] = load float, ptr [[S5]], align 4 +; CHECK-NEXT: [[L6:%.*]] = load float, ptr [[S6]], align 4 +; CHECK-NEXT: [[L7:%.*]] = load float, ptr [[S7]], align 4 +; CHECK-NEXT: [[L8:%.*]] = load float, ptr [[S8]], align 4 +; CHECK-NEXT: [[L9:%.*]] = load float, ptr [[S9]], align 4 +; CHECK-NEXT: [[L10:%.*]] = load float, ptr [[S10]], align 4 +; CHECK-NEXT: [[L11:%.*]] = load float, ptr [[S11]], align 4 +; CHECK-NEXT: [[L12:%.*]] = load float, ptr [[S12]], align 4 +; CHECK-NEXT: [[L13:%.*]] = load float, ptr [[S13]], align 4 +; CHECK-NEXT: [[L14:%.*]] = load float, ptr [[S14]], align 4 +; CHECK-NEXT: [[L15:%.*]] = load float, ptr [[S15]], align 4 +; CHECK-NEXT: [[T0:%.*]] = fptrunc float [[L0]] to half +; CHECK-NEXT: [[T1:%.*]] = fptrunc float [[L1]] to half +; CHECK-NEXT: [[T2:%.*]] = fptrunc float [[L2]] to half +; CHECK-NEXT: [[T3:%.*]] = fptrunc float [[L3]] to half +; CHECK-NEXT: [[T4:%.*]] = fptrunc float [[L4]] to half +; CHECK-NEXT: [[T5:%.*]] = fptrunc float [[L5]] to half +; CHECK-NEXT: [[T6:%.*]] = fptrunc float [[L6]] to half +; CHECK-NEXT: [[T7:%.*]] = fptrunc float [[L7]] to half +; CHECK-NEXT: [[T8:%.*]] = fptrunc float [[L8]] to half +; CHECK-NEXT: [[T9:%.*]] = fptrunc float [[L9]] to half +; CHECK-NEXT: [[T10:%.*]] = fptrunc float [[L10]] to half +; CHECK-NEXT: [[T11:%.*]] = fptrunc float [[L11]] to half +; CHECK-NEXT: [[T12:%.*]] = fptrunc float [[L12]] to half +; CHECK-NEXT: [[T13:%.*]] = fptrunc float [[L13]] to half +; CHECK-NEXT: [[T14:%.*]] = fptrunc float [[L14]] to half +; CHECK-NEXT: [[T15:%.*]] = fptrunc float [[L15]] to half +; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 +; CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 2 +; CHECK-NEXT: [[D3:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 3 +; CHECK-NEXT: [[D4:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 4 +; CHECK-NEXT: [[D5:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 5 +; CHECK-NEXT: [[D6:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 6 +; CHECK-NEXT: [[D7:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 7 +; CHECK-NEXT: [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8 +; CHECK-NEXT: [[D9:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 9 +; CHECK-NEXT: [[D10:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 10 +; CHECK-NEXT: [[D11:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 11 +; CHECK-NEXT: [[D12:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 12 +; CHECK-NEXT: [[D13:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 13 +; CHECK-NEXT: [[D14:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 14 +; CHECK-NEXT: [[D15:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 15 +; CHECK-NEXT: store half [[T0]], ptr [[D0]], align 2 +; CHECK-NEXT: store half [[T1]], ptr [[D1]], align 2 +; CHECK-NEXT: store half [[T2]], ptr [[D2]], align 2 +; CHECK-NEXT: store half [[T3]], ptr [[D3]], align 2 +; CHECK-NEXT: store half [[T4]], ptr [[D4]], align 2 +; CHECK-NEXT: store half [[T5]], ptr [[D5]], align 2 +; CHECK-NEXT: store half [[T6]], ptr [[D6]], align 2 +; CHECK-NEXT: store half [[T7]], ptr [[D7]], align 2 +; CHECK-NEXT: store half [[T8]], ptr [[D8]], align 2 +; CHECK-NEXT: store half [[T9]], ptr [[D9]], align 2 +; CHECK-NEXT: store half [[T10]], ptr [[D10]], align 2 +; CHECK-NEXT: store half [[T11]], ptr [[D11]], align 2 +; CHECK-NEXT: store half [[T12]], ptr [[D12]], align 2 +; CHECK-NEXT: store half [[T13]], ptr [[D13]], align 2 +; CHECK-NEXT: store half [[T14]], ptr [[D14]], align 2 +; CHECK-NEXT: store half [[T15]], ptr [[D15]], align 2 +; CHECK-NEXT: ret void +; +; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16( +; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8 +; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8 +; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[S0]], align 4 +; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <8 x float> [[TMP1]] to <8 x half> +; CHECK-F16C-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[S8]], align 4 +; CHECK-F16C-NEXT: [[TMP4:%.*]] = fptrunc <8 x float> [[TMP3]] to <8 x half> +; CHECK-F16C-NEXT: store <8 x half> [[TMP2]], ptr [[D0]], align 2 +; CHECK-F16C-NEXT: store <8 x half> [[TMP4]], ptr [[D8]], align 2 +; CHECK-F16C-NEXT: ret void +; +; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16( +; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4 +; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half> +; CHECK-AVX512-NEXT: store <16 x half> [[TMP2]], ptr [[D0]], align 2 +; CHECK-AVX512-NEXT: ret void +; + %s1 = getelementptr inbounds float, ptr %s0, i64 1 + %s2 = getelementptr inbounds float, ptr %s0, i64 2 + %s3 = getelementptr inbounds float, ptr %s0, i64 3 + %s4 = getelementptr inbounds float, ptr %s0, i64 4 + %s5 = getelementptr inbounds float, ptr %s0, i64 5 + %s6 = getelementptr inbounds float, ptr %s0, i64 6 + %s7 = getelementptr inbounds float, ptr %s0, i64 7 + %s8 = getelementptr inbounds float, ptr %s0, i64 8 + %s9 = getelementptr inbounds float, ptr %s0, i64 9 + %s10 = getelementptr inbounds float, ptr %s0, i64 10 + %s11 = getelementptr inbounds float, ptr %s0, i64 11 + %s12 = getelementptr inbounds float, ptr %s0, i64 12 + %s13 = getelementptr inbounds float, ptr %s0, i64 13 + %s14 = getelementptr inbounds float, ptr %s0, i64 14 + %s15 = getelementptr inbounds float, ptr %s0, i64 15 + %l0 = load float, ptr %s0, align 4 + %l1 = load float, ptr %s1, align 4 + %l2 = load float, ptr %s2, align 4 + %l3 = load float, ptr %s3, align 4 + %l4 = load float, ptr %s4, align 4 + %l5 = load float, ptr %s5, align 4 + %l6 = load float, ptr %s6, align 4 + %l7 = load float, ptr %s7, align 4 + %l8 = load float, ptr %s8, align 4 + %l9 = load float, ptr %s9, align 4 + %l10 = load float, ptr %s10, align 4 + %l11 = load float, ptr %s11, align 4 + %l12 = load float, ptr %s12, align 4 + %l13 = load float, ptr %s13, align 4 + %l14 = load float, ptr %s14, align 4 + %l15 = load float, ptr %s15, align 4 + + %t0 = fptrunc float %l0 to half + %t1 = fptrunc float %l1 to half + %t2 = fptrunc float %l2 to half + %t3 = fptrunc float %l3 to half + %t4 = fptrunc float %l4 to half + %t5 = fptrunc float %l5 to half + %t6 = fptrunc float %l6 to half + %t7 = fptrunc float %l7 to half + %t8 = fptrunc float %l8 to half + %t9 = fptrunc float %l9 to half + %t10 = fptrunc float %l10 to half + %t11 = fptrunc float %l11 to half + %t12 = fptrunc float %l12 to half + %t13 = fptrunc float %l13 to half + %t14 = fptrunc float %l14 to half + %t15 = fptrunc float %l15 to half + + %d1 = getelementptr inbounds half, ptr %d0, i64 1 + %d2 = getelementptr inbounds half, ptr %d0, i64 2 + %d3 = getelementptr inbounds half, ptr %d0, i64 3 + %d4 = getelementptr inbounds half, ptr %d0, i64 4 + %d5 = getelementptr inbounds half, ptr %d0, i64 5 + %d6 = getelementptr inbounds half, ptr %d0, i64 6 + %d7 = getelementptr inbounds half, ptr %d0, i64 7 + %d8 = getelementptr inbounds half, ptr %d0, i64 8 + %d9 = getelementptr inbounds half, ptr %d0, i64 9 + %d10 = getelementptr inbounds half, ptr %d0, i64 10 + %d11 = getelementptr inbounds half, ptr %d0, i64 11 + %d12 = getelementptr inbounds half, ptr %d0, i64 12 + %d13 = getelementptr inbounds half, ptr %d0, i64 13 + %d14 = getelementptr inbounds half, ptr %d0, i64 14 + %d15 = getelementptr inbounds half, ptr %d0, i64 15 + store half %t0, ptr %d0, align 2 + store half %t1, ptr %d1, align 2 + store half %t2, ptr %d2, align 2 + store half %t3, ptr %d3, align 2 + store half %t4, ptr %d4, align 2 + store half %t5, ptr %d5, align 2 + store half %t6, ptr %d6, align 2 + store half %t7, ptr %d7, align 2 + store half %t8, ptr %d8, align 2 + store half %t9, ptr %d9, align 2 + store half %t10, ptr %d10, align 2 + store half %t11, ptr %d11, align 2 + store half %t12, ptr %d12, align 2 + store half %t13, ptr %d13, align 2 + store half %t14, ptr %d14, align 2 + store half %t15, ptr %d15, align 2 + ret void + +} + +; There is no instruction to round f64 to f16; this should not get vectorized! +define void @fpround_v2xf64_v2xf16(ptr %s0, ptr %d0) { +; CHECK-LABEL: define void @fpround_v2xf64_v2xf16( +; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds double, ptr [[S0]], i64 1 +; CHECK-NEXT: [[L0:%.*]] = load double, ptr [[S0]], align 4 +; CHECK-NEXT: [[L1:%.*]] = load double, ptr [[S1]], align 4 +; CHECK-NEXT: [[T0:%.*]] = fptrunc double [[L0]] to half +; CHECK-NEXT: [[T1:%.*]] = fptrunc double [[L1]] to half +; CHECK-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 +; CHECK-NEXT: store half [[T0]], ptr [[D0]], align 2 +; CHECK-NEXT: store half [[T1]], ptr [[D1]], align 2 +; CHECK-NEXT: ret void +; +; CHECK-F16C-LABEL: define void @fpround_v2xf64_v2xf16( +; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-F16C-NEXT: [[S1:%.*]] = getelementptr inbounds double, ptr [[S0]], i64 1 +; CHECK-F16C-NEXT: [[L0:%.*]] = load double, ptr [[S0]], align 4 +; CHECK-F16C-NEXT: [[L1:%.*]] = load double, ptr [[S1]], align 4 +; CHECK-F16C-NEXT: [[T0:%.*]] = fptrunc double [[L0]] to half +; CHECK-F16C-NEXT: [[T1:%.*]] = fptrunc double [[L1]] to half +; CHECK-F16C-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 +; CHECK-F16C-NEXT: store half [[T0]], ptr [[D0]], align 2 +; CHECK-F16C-NEXT: store half [[T1]], ptr [[D1]], align 2 +; CHECK-F16C-NEXT: ret void +; +; CHECK-AVX512-LABEL: define void @fpround_v2xf64_v2xf16( +; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { +; CHECK-AVX512-NEXT: [[S1:%.*]] = getelementptr inbounds double, ptr [[S0]], i64 1 +; CHECK-AVX512-NEXT: [[L0:%.*]] = load double, ptr [[S0]], align 4 +; CHECK-AVX512-NEXT: [[L1:%.*]] = load double, ptr [[S1]], align 4 +; CHECK-AVX512-NEXT: [[T0:%.*]] = fptrunc double [[L0]] to half +; CHECK-AVX512-NEXT: [[T1:%.*]] = fptrunc double [[L1]] to half +; CHECK-AVX512-NEXT: [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1 +; CHECK-AVX512-NEXT: store half [[T0]], ptr [[D0]], align 2 +; CHECK-AVX512-NEXT: store half [[T1]], ptr [[D1]], align 2 +; CHECK-AVX512-NEXT: ret void +; + %s1 = getelementptr inbounds double, ptr %s0, i64 1 + %l0 = load double, ptr %s0, align 4 + %l1 = load double, ptr %s1, align 4 + + %t0 = fptrunc double %l0 to half + %t1 = fptrunc double %l1 to half + + %d1 = getelementptr inbounds half, ptr %d0, i64 1 + store half %t0, ptr %d0, align 2 + store half %t1, ptr %d1, align 2 + ret void +} From 5ad500ca4a1cba1f39757ba2660d4e0c6e3559d3 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 25 Oct 2024 16:23:07 -0700 Subject: [PATCH 070/425] [RISCV] Coverage for a few missed vector idioms --- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 97 +++++++++++++++---- .../RISCV/rvv/fixed-vectors-mask-buildvec.ll | 23 +++++ .../CodeGen/RISCV/rvv/fold-binary-reduce.ll | 75 ++++++++++++++ 3 files changed, 175 insertions(+), 20 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index 7bf47d42de3b951..ea4072f15712042 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -428,6 +428,33 @@ define void @buildvec_dominant0_v8i16(ptr %x) { ret void } +define void @buildvec_dominant0_v8i16_with_end_element(ptr %x) { +; CHECK-LABEL: buildvec_dominant0_v8i16_with_end_element: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 8 +; CHECK-NEXT: li a1, 3 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store <8 x i16> , ptr %x + ret void +} + +define void @buildvec_dominant0_v8i16_with_tail(ptr %x) { +; CHECK-LABEL: buildvec_dominant0_v8i16_with_tail: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, %hi(.LCPI35_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI35_0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: ret + store <8 x i16> , ptr %x + ret void +} + + define void @buildvec_dominant1_v8i16(ptr %x) { ; CHECK-LABEL: buildvec_dominant1_v8i16: ; CHECK: # %bb.0: @@ -494,8 +521,8 @@ define <2 x i8> @buildvec_dominant2_v2i8() { define void @buildvec_dominant0_v2i32(ptr %x) { ; RV32-LABEL: buildvec_dominant0_v2i32: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI38_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI38_0) +; RV32-NEXT: lui a1, %hi(.LCPI40_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI40_0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: vse32.v v8, (a0) @@ -503,8 +530,8 @@ define void @buildvec_dominant0_v2i32(ptr %x) { ; ; RV64V-LABEL: buildvec_dominant0_v2i32: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI38_0) -; RV64V-NEXT: ld a1, %lo(.LCPI38_0)(a1) +; RV64V-NEXT: lui a1, %hi(.LCPI40_0) +; RV64V-NEXT: ld a1, %lo(.LCPI40_0)(a1) ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.i v8, -1 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma @@ -514,8 +541,8 @@ define void @buildvec_dominant0_v2i32(ptr %x) { ; ; RV64ZVE32-LABEL: buildvec_dominant0_v2i32: ; RV64ZVE32: # %bb.0: -; RV64ZVE32-NEXT: lui a1, %hi(.LCPI38_0) -; RV64ZVE32-NEXT: ld a1, %lo(.LCPI38_0)(a1) +; RV64ZVE32-NEXT: lui a1, %hi(.LCPI40_0) +; RV64ZVE32-NEXT: ld a1, %lo(.LCPI40_0)(a1) ; RV64ZVE32-NEXT: li a2, -1 ; RV64ZVE32-NEXT: sd a1, 0(a0) ; RV64ZVE32-NEXT: sd a2, 8(a0) @@ -527,8 +554,8 @@ define void @buildvec_dominant0_v2i32(ptr %x) { define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize { ; RV32-LABEL: buildvec_dominant1_optsize_v2i32: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI39_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI39_0) +; RV32-NEXT: lui a1, %hi(.LCPI41_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI41_0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: vse32.v v8, (a0) @@ -536,8 +563,8 @@ define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize { ; ; RV64V-LABEL: buildvec_dominant1_optsize_v2i32: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI39_0) -; RV64V-NEXT: addi a1, a1, %lo(.LCPI39_0) +; RV64V-NEXT: lui a1, %hi(.LCPI41_0) +; RV64V-NEXT: addi a1, a1, %lo(.LCPI41_0) ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vle64.v v8, (a1) ; RV64V-NEXT: vse64.v v8, (a0) @@ -545,8 +572,8 @@ define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize { ; ; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32: ; RV64ZVE32: # %bb.0: -; RV64ZVE32-NEXT: lui a1, %hi(.LCPI39_0) -; RV64ZVE32-NEXT: ld a1, %lo(.LCPI39_0)(a1) +; RV64ZVE32-NEXT: lui a1, %hi(.LCPI41_0) +; RV64ZVE32-NEXT: ld a1, %lo(.LCPI41_0)(a1) ; RV64ZVE32-NEXT: li a2, -1 ; RV64ZVE32-NEXT: sd a1, 0(a0) ; RV64ZVE32-NEXT: sd a2, 8(a0) @@ -604,8 +631,8 @@ define void @buildvec_seq_v8i8_v2i32(ptr %x) { define void @buildvec_seq_v16i8_v2i64(ptr %x) { ; RV32-LABEL: buildvec_seq_v16i8_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: lui a1, %hi(.LCPI42_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI42_0) +; RV32-NEXT: lui a1, %hi(.LCPI44_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI44_0) ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a1) ; RV32-NEXT: vse8.v v8, (a0) @@ -613,8 +640,8 @@ define void @buildvec_seq_v16i8_v2i64(ptr %x) { ; ; RV64V-LABEL: buildvec_seq_v16i8_v2i64: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI42_0) -; RV64V-NEXT: ld a1, %lo(.LCPI42_0)(a1) +; RV64V-NEXT: lui a1, %hi(.LCPI44_0) +; RV64V-NEXT: ld a1, %lo(.LCPI44_0)(a1) ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma @@ -623,8 +650,8 @@ define void @buildvec_seq_v16i8_v2i64(ptr %x) { ; ; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64: ; RV64ZVE32: # %bb.0: -; RV64ZVE32-NEXT: lui a1, %hi(.LCPI42_0) -; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI42_0) +; RV64ZVE32-NEXT: lui a1, %hi(.LCPI44_0) +; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI44_0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vle8.v v8, (a1) ; RV64ZVE32-NEXT: vse8.v v8, (a0) @@ -656,8 +683,8 @@ define void @buildvec_seq2_v16i8_v2i64(ptr %x) { ; ; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64: ; RV64ZVE32: # %bb.0: -; RV64ZVE32-NEXT: lui a1, %hi(.LCPI43_0) -; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI43_0) +; RV64ZVE32-NEXT: lui a1, %hi(.LCPI45_0) +; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI45_0) ; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vle8.v v8, (a1) ; RV64ZVE32-NEXT: vse8.v v8, (a0) @@ -3384,3 +3411,33 @@ define <1 x i32> @buildvec_v1i32_pack(i32 %e1) { ret <1 x i32> %v1 } +define <4 x i32> @buildvec_vslide1up(i32 %e1, i32 %e2) { +; CHECK-LABEL: buildvec_vslide1up: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: ret + %v1 = insertelement <4 x i32> poison, i32 %e2, i32 0 + %v2 = insertelement <4 x i32> %v1, i32 %e1, i32 1 + %v3 = insertelement <4 x i32> %v2, i32 %e1, i32 2 + %v4 = insertelement <4 x i32> %v3, i32 %e1, i32 3 + ret <4 x i32> %v4 +} + +define <4 x i1> @buildvec_i1_splat(i1 %e1) { +; CHECK-LABEL: buildvec_i1_splat: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v1 = insertelement <4 x i1> poison, i1 %e1, i32 0 + %v2 = insertelement <4 x i1> %v1, i1 %e1, i32 1 + %v3 = insertelement <4 x i1> %v2, i1 %e1, i32 2 + %v4 = insertelement <4 x i1> %v3, i1 %e1, i32 3 + ret <4 x i1> %v4 +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index 47cbb2509441ad6..5b9af1a3cfe233a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -566,3 +566,26 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize { ; ZVE32F-NEXT: ret ret <128 x i1> } + +define <4 x i1> @buildvec_mask_splat(i1 %e1) { +; CHECK-LABEL: buildvec_mask_splat: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret +; +; ZVE32F-LABEL: buildvec_mask_splat: +; ZVE32F: # %bb.0: +; ZVE32F-NEXT: andi a0, a0, 1 +; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vmv.v.x v8, a0 +; ZVE32F-NEXT: vmsne.vi v0, v8, 0 +; ZVE32F-NEXT: ret + %v1 = insertelement <4 x i1> poison, i1 %e1, i32 0 + %v2 = insertelement <4 x i1> %v1, i1 %e1, i32 1 + %v3 = insertelement <4 x i1> %v2, i1 %e1, i32 2 + %v4 = insertelement <4 x i1> %v3, i1 %e1, i32 3 + ret <4 x i1> %v4 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll index 351c0bab9dca893..adfae5ede7bb59a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll @@ -366,3 +366,78 @@ entry: ret void } declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) + +define i64 @op_then_reduce(<4 x i64> %v, <4 x i64> %v2) { +; CHECK-LABEL: op_then_reduce: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v10 +; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + %rdx1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) + %rdx2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v2) + %res = add i64 %rdx1, %rdx2 + ret i64 %res +} + + +define i64 @two_reduce_scalar_bypass(<4 x i64> %v, <4 x i64> %v2) { +; CHECK-LABEL: two_reduce_scalar_bypass: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v12, zero +; CHECK-NEXT: vredxor.vs v8, v8, v12 +; CHECK-NEXT: vredsum.vs v8, v10, v8 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + %rdx1 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v) + %rdx2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v2) + %res = add i64 %rdx1, %rdx2 + ret i64 %res +} + +define i64 @two_reduce_scalar_bypass_zext(<4 x i64> %v, <4 x i32> %v2) { +; CHECK-LABEL: two_reduce_scalar_bypass_zext: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v11, zero +; CHECK-NEXT: vredsum.vs v10, v10, v11 +; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: srli a0, a0, 32 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + %rdx1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) + %rdx2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v2) + %rdx2.zext = zext i32 %rdx2 to i64 + %res = add i64 %rdx1, %rdx2.zext + ret i64 %res +} + +define i64 @two_reduce_scalar_bypass_sext(<4 x i64> %v, <4 x i32> %v2) { +; CHECK-LABEL: two_reduce_scalar_bypass_sext: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v11, zero +; CHECK-NEXT: vredsum.vs v10, v10, v11 +; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: ret +entry: + %rdx1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v) + %rdx2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v2) + %rdx2.zext = sext i32 %rdx2 to i64 + %res = add i64 %rdx1, %rdx2.zext + ret i64 %res +} From cae351f3453a0a26ec8eb2ddaf773c24a29d929e Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Sat, 26 Oct 2024 07:38:52 +0800 Subject: [PATCH 071/425] [libc++] Bump `__cpp_lib_optional` and `__cpp_lib_variant` (#113650) In C++20 mode, `__cpp_lib_optional` and `__cpp_lib_variant` should be `202106L` due to DR P2231R1. In C++26 mode, `__cpp_lib_variant` should be bumped to `202306L` due to P2637R3. - Clang 16/17 shouldn't get this bumping (as member `visit` requires explicit object parameters), but it's very tricky to make the bumping conditionally enabled. I _hope_ unconditionally bumping in C++26 will be OK for LLVM 20 when the support for Clang 17 is dropped. Related PRs: - https://reviews.llvm.org/D102119 - #83335 - #76447 --- libcxx/docs/FeatureTestMacroTable.rst | 6 ++++++ libcxx/docs/Status/Cxx20Papers.csv | 2 +- libcxx/docs/Status/Cxx2cPapers.csv | 2 +- libcxx/include/version | 11 ++++++++++- .../optional.version.compile.pass.cpp | 5 +++-- .../variant.version.compile.pass.cpp | 14 ++++++++------ .../version.version.compile.pass.cpp | 19 +++++++++++-------- .../generate_feature_test_macro_components.py | 10 +++++++--- 8 files changed, 47 insertions(+), 22 deletions(-) diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 05b08da52153501..db24b65caca6c0a 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -264,6 +264,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_move_iterator_concept`` ``202207L`` ---------------------------------------------------------- ----------------- + ``__cpp_lib_optional`` ``202106L`` + ---------------------------------------------------------- ----------------- ``__cpp_lib_polymorphic_allocator`` ``201902L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_ranges`` ``202110L`` @@ -300,6 +302,8 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_unwrap_ref`` ``201811L`` ---------------------------------------------------------- ----------------- + ``__cpp_lib_variant`` ``202106L`` + ---------------------------------------------------------- ----------------- **C++23** ---------------------------------------------------------------------------- ``__cpp_lib_adaptor_iterator_pair_constructor`` ``202106L`` @@ -491,5 +495,7 @@ Status ``__cpp_lib_to_string`` *unimplemented* ---------------------------------------------------------- ----------------- ``__cpp_lib_tuple_like`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_variant`` ``202306L`` ========================================================== ================= diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv index cc75d28f14aac20..9a057be8ad0519c 100644 --- a/libcxx/docs/Status/Cxx20Papers.csv +++ b/libcxx/docs/Status/Cxx20Papers.csv @@ -192,7 +192,7 @@ "`P2106R0 `__","Alternative wording for GB315 and GB316","2020-02 (Prague)","|Complete|","15.0","" "`P2116R0 `__","Remove tuple-like protocol support from fixed-extent span","2020-02 (Prague)","|Complete|","11.0","" "","","","","","" -"`P2231R1 `__","Missing constexpr in std::optional and std::variant","2021-06 (Virtual)","|Complete|","19.0","" +"`P2231R1 `__","Missing constexpr in std::optional and std::variant","2021-06 (Virtual)","|Complete|","19.0","Changes of feature-test macros are completed in LLVM 20." "`P2325R3 `__","Views should not be required to be default constructible","2021-06 (Virtual)","|Complete|","16.0","" "`P2210R2 `__","Superior String Splitting","2021-06 (Virtual)","|Complete|","16.0","" "`P2216R3 `__","std::format improvements","2021-06 (Virtual)","|Complete|","15.0","" diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index 8864b1ebe288910..d5d5cdda065ae10 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -17,7 +17,7 @@ "`P0792R14 `__","``function_ref``: a type-erased callable reference","2023-06 (Varna)","","","" "`P2874R2 `__","Mandating Annex D Require No More","2023-06 (Varna)","","","" "`P2757R3 `__","Type-checking format args","2023-06 (Varna)","","","" -"`P2637R3 `__","Member ``visit``","2023-06 (Varna)","|Complete|","19.0","" +"`P2637R3 `__","Member ``visit``","2023-06 (Varna)","|Complete|","19.0","Change of ``__cpp_lib_variant`` is completed in LLVM 20. Change of ``__cpp_lib_format`` is blocked by `P2419R2 `__." "`P2641R4 `__","Checking if a ``union`` alternative is active","2023-06 (Varna)","","","" "`P1759R6 `__","Native handles and file streams","2023-06 (Varna)","|Complete|","18.0","" "`P2697R1 `__","Interfacing ``bitset`` with ``string_view``","2023-06 (Varna)","|Complete|","18.0","" diff --git a/libcxx/include/version b/libcxx/include/version index 5ab4f28a04d880e..cb75f3b2db681c7 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -174,6 +174,7 @@ __cpp_lib_nonmember_container_access 201411L __cpp_lib_not_fn 201603L __cpp_lib_null_iterators 201304L __cpp_lib_optional 202110L + 202106L // C++20 201606L // C++17 __cpp_lib_optional_range_support 202406L __cpp_lib_out_ptr 202311L @@ -261,7 +262,9 @@ __cpp_lib_uncaught_exceptions 201411L __cpp_lib_unordered_map_try_emplace 201411L __cpp_lib_unreachable 202202L __cpp_lib_unwrap_ref 201811L -__cpp_lib_variant 202102L +__cpp_lib_variant 202306L + 202106L // C++20 + 202102L // C++17 __cpp_lib_void_t 201411L */ @@ -427,6 +430,8 @@ __cpp_lib_void_t 201411L # define __cpp_lib_list_remove_return_type 201806L # define __cpp_lib_math_constants 201907L # define __cpp_lib_move_iterator_concept 202207L +# undef __cpp_lib_optional +# define __cpp_lib_optional 202106L # if _LIBCPP_AVAILABILITY_HAS_PMR # define __cpp_lib_polymorphic_allocator 201902L # endif @@ -453,6 +458,8 @@ __cpp_lib_void_t 201411L # define __cpp_lib_to_array 201907L # define __cpp_lib_type_identity 201806L # define __cpp_lib_unwrap_ref 201811L +# undef __cpp_lib_variant +# define __cpp_lib_variant 202106L #endif #if _LIBCPP_STD_VER >= 23 @@ -570,6 +577,8 @@ __cpp_lib_void_t 201411L // # define __cpp_lib_to_string 202306L # undef __cpp_lib_tuple_like // # define __cpp_lib_tuple_like 202311L +# undef __cpp_lib_variant +# define __cpp_lib_variant 202306L #endif // clang-format on diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp index f265be091f79b5f..91abbbc77837bd9 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp @@ -19,6 +19,7 @@ __cpp_lib_constrained_equality 202403L [C++26] __cpp_lib_freestanding_optional 202311L [C++26] __cpp_lib_optional 201606L [C++17] + 202106L [C++20] 202110L [C++23] __cpp_lib_optional_range_support 202406L [C++26] */ @@ -96,8 +97,8 @@ # ifndef __cpp_lib_optional # error "__cpp_lib_optional should be defined in c++20" # endif -# if __cpp_lib_optional != 201606L -# error "__cpp_lib_optional should have the value 201606L in c++20" +# if __cpp_lib_optional != 202106L +# error "__cpp_lib_optional should have the value 202106L in c++20" # endif # ifdef __cpp_lib_optional_range_support diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp index 4dcc477696bfdde..598e976bda3cf60 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp @@ -19,6 +19,8 @@ __cpp_lib_constrained_equality 202403L [C++26] __cpp_lib_freestanding_variant 202311L [C++26] __cpp_lib_variant 202102L [C++17] + 202106L [C++20] + 202306L [C++26] */ #include @@ -82,8 +84,8 @@ # ifndef __cpp_lib_variant # error "__cpp_lib_variant should be defined in c++20" # endif -# if __cpp_lib_variant != 202102L -# error "__cpp_lib_variant should have the value 202102L in c++20" +# if __cpp_lib_variant != 202106L +# error "__cpp_lib_variant should have the value 202106L in c++20" # endif #elif TEST_STD_VER == 23 @@ -99,8 +101,8 @@ # ifndef __cpp_lib_variant # error "__cpp_lib_variant should be defined in c++23" # endif -# if __cpp_lib_variant != 202102L -# error "__cpp_lib_variant should have the value 202102L in c++23" +# if __cpp_lib_variant != 202106L +# error "__cpp_lib_variant should have the value 202106L in c++23" # endif #elif TEST_STD_VER > 23 @@ -134,8 +136,8 @@ # ifndef __cpp_lib_variant # error "__cpp_lib_variant should be defined in c++26" # endif -# if __cpp_lib_variant != 202102L -# error "__cpp_lib_variant should have the value 202102L in c++26" +# if __cpp_lib_variant != 202306L +# error "__cpp_lib_variant should have the value 202306L in c++26" # endif #endif // TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 0614f64a2ef04d6..5deaee16895f66c 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -158,6 +158,7 @@ __cpp_lib_not_fn 201603L [C++17] __cpp_lib_null_iterators 201304L [C++14] __cpp_lib_optional 201606L [C++17] + 202106L [C++20] 202110L [C++23] __cpp_lib_optional_range_support 202406L [C++26] __cpp_lib_out_ptr 202106L [C++23] @@ -244,6 +245,8 @@ __cpp_lib_unreachable 202202L [C++23] __cpp_lib_unwrap_ref 201811L [C++20] __cpp_lib_variant 202102L [C++17] + 202106L [C++20] + 202306L [C++26] __cpp_lib_void_t 201411L [C++17] */ @@ -4087,8 +4090,8 @@ # ifndef __cpp_lib_optional # error "__cpp_lib_optional should be defined in c++20" # endif -# if __cpp_lib_optional != 201606L -# error "__cpp_lib_optional should have the value 201606L in c++20" +# if __cpp_lib_optional != 202106L +# error "__cpp_lib_optional should have the value 202106L in c++20" # endif # ifdef __cpp_lib_optional_range_support @@ -4569,8 +4572,8 @@ # ifndef __cpp_lib_variant # error "__cpp_lib_variant should be defined in c++20" # endif -# if __cpp_lib_variant != 202102L -# error "__cpp_lib_variant should have the value 202102L in c++20" +# if __cpp_lib_variant != 202106L +# error "__cpp_lib_variant should have the value 202106L in c++20" # endif # ifndef __cpp_lib_void_t @@ -6196,8 +6199,8 @@ # ifndef __cpp_lib_variant # error "__cpp_lib_variant should be defined in c++23" # endif -# if __cpp_lib_variant != 202102L -# error "__cpp_lib_variant should have the value 202102L in c++23" +# if __cpp_lib_variant != 202106L +# error "__cpp_lib_variant should have the value 202106L in c++23" # endif # ifndef __cpp_lib_void_t @@ -8141,8 +8144,8 @@ # ifndef __cpp_lib_variant # error "__cpp_lib_variant should be defined in c++26" # endif -# if __cpp_lib_variant != 202102L -# error "__cpp_lib_variant should have the value 202102L in c++26" +# if __cpp_lib_variant != 202306L +# error "__cpp_lib_variant should have the value 202306L in c++26" # endif # ifndef __cpp_lib_void_t diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 7ab1af93d177406..197d6bbc692226c 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -942,7 +942,11 @@ def add_version_header(tc): }, { "name": "__cpp_lib_optional", - "values": {"c++17": 201606, "c++23": 202110}, + "values": { + "c++17": 201606, + "c++20": 202106, # P2231R1 Missing constexpr in std::optional and std::variant + "c++23": 202110, # P0798R8 Monadic operations for std::optional + LWG3621 Remove feature-test macro __cpp_lib_monadic_optional + }, "headers": ["optional"], }, { @@ -1406,8 +1410,8 @@ def add_version_header(tc): "name": "__cpp_lib_variant", "values": { "c++17": 202102, # std::visit for classes derived from std::variant - # "c++20": 202106, # Fully constexpr std::variant - # "c++26": 202306, # Member visit (implemented) + "c++20": 202106, # P2231R1 Missing constexpr in std::optional and std::variant + "c++26": 202306, # P2637R3 Member visit }, "headers": ["variant"], }, From 5f7bad07b9d5b6c5cfa8c16a4e62cf1e128725be Mon Sep 17 00:00:00 2001 From: B I Mohammed Abbas Date: Sat, 26 Oct 2024 05:35:23 +0530 Subject: [PATCH 072/425] Add extendhfxf2 into compiler rt (#111099) Retry pr #109090 with updated extendhfxf2 test --- compiler-rt/lib/builtins/CMakeLists.txt | 1 + compiler-rt/lib/builtins/extendhfxf2.c | 18 +++++ .../lib/builtins/macho_embedded/common.txt | 1 + .../test/builtins/Unit/extendhfxf2_test.c | 71 +++++++++++++++++++ .../compiler-rt/lib/builtins/BUILD.gn | 1 + 5 files changed, 92 insertions(+) create mode 100644 compiler-rt/lib/builtins/extendhfxf2.c create mode 100644 compiler-rt/test/builtins/Unit/extendhfxf2_test.c diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 9a0a50ee7003f19..97a9e508d37a325 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -104,6 +104,7 @@ set(GENERIC_SOURCES divti3.c extendsfdf2.c extendhfsf2.c + extendhfxf2.c ffsdi2.c ffssi2.c ffsti2.c diff --git a/compiler-rt/lib/builtins/extendhfxf2.c b/compiler-rt/lib/builtins/extendhfxf2.c new file mode 100644 index 000000000000000..7425859f79f763a --- /dev/null +++ b/compiler-rt/lib/builtins/extendhfxf2.c @@ -0,0 +1,18 @@ +//===-- lib/extendhfxf2.c - half -> long double conversion --------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SRC_HALF +#define DST_DOUBLE +#include "fp_extend_impl.inc" + +// Use a forwarding definition and noinline to implement a poor man's alias, +// as there isn't a good cross-platform way of defining one. +// Long double are expected to be as precise as double. +COMPILER_RT_ABI NOINLINE long double __extendhfxf2(src_t a) { + return (long double)__extendXfYf2__(a); +} diff --git a/compiler-rt/lib/builtins/macho_embedded/common.txt b/compiler-rt/lib/builtins/macho_embedded/common.txt index 819109768f52989..fa99bc239e68f28 100644 --- a/compiler-rt/lib/builtins/macho_embedded/common.txt +++ b/compiler-rt/lib/builtins/macho_embedded/common.txt @@ -60,6 +60,7 @@ divsf3 divsi3 extendsfdf2 extendhfsf2 +extendhfxf2 ffssi2 fixdfsi fixsfsi diff --git a/compiler-rt/test/builtins/Unit/extendhfxf2_test.c b/compiler-rt/test/builtins/Unit/extendhfxf2_test.c new file mode 100644 index 000000000000000..9972b024ab415e3 --- /dev/null +++ b/compiler-rt/test/builtins/Unit/extendhfxf2_test.c @@ -0,0 +1,71 @@ +// RUN: %clang_builtins %s %librt -o %t && %run %t +// REQUIRES: librt_has_extendhfxf2 + +#include +#include // for isnan, isinf +#include + +#if __LDBL_MANT_DIG__ >= 64 && defined(COMPILER_RT_HAS_FLOAT16) + +long double __extendhfxf2(_Float16 f); + +int test_extendhfxf2(_Float16 a, long double expected) { + long double x = __extendhfxf2(a); + __uint16_t *b = (void *)&a; + int ret = !((isnan(x) && isnan(expected)) || x == expected); + if (ret) { + printf("error in test__extendhfxf2(%#.4x) = %.20Lf, " + "expected %.20Lf\n", + *b, x, expected); + } + return ret; +} + +char assumption_1[sizeof(_Float16) * CHAR_BIT == 16] = {0}; + +int main() { + // Small positive value + if (test_extendhfxf2(0.09997558593750000000f, 0.09997558593750000000L)) + return 1; + + // Small negative value + if (test_extendhfxf2(-0.09997558593750000000f, -0.09997558593750000000L)) + return 1; + + // Zero + if (test_extendhfxf2(0.0f, 0.0L)) + return 1; + + // Smallest positive non-zero value + if (test_extendhfxf2(0x1p-16f, 0x1p-16L)) + return 1; + + // Smallest negative non-zero value + if (test_extendhfxf2(-0x1p-16f, -0x1p-16L)) + return 1; + + // Positive infinity + if (test_extendhfxf2(__builtin_huge_valf16(), __builtin_huge_valf64x())) + return 1; + + // Negative infinity + if (test_extendhfxf2(-__builtin_huge_valf16(), + (long double)-__builtin_huge_valf64x())) + return 1; + + // NaN + if (test_extendhfxf2(__builtin_nanf16(""), + (long double)__builtin_nanf64x(""))) + return 1; + + return 0; +} + +#else + +int main() { + printf("skipped\n"); + return 0; +} + +#endif diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index 8904aed28229f15..efbf01960bf907f 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -126,6 +126,7 @@ static_library("builtins") { "divsi3.c", "divti3.c", "extendhfsf2.c", + "extendhfxf2.c" "extendsfdf2.c", "ffsdi2.c", "ffssi2.c", From 889b67c9d30e3024a1317431d66c22599f6c2011 Mon Sep 17 00:00:00 2001 From: donald chen Date: Sat, 26 Oct 2024 08:07:51 +0800 Subject: [PATCH 073/425] [mlir] [memref] add more checks to the memref.reinterpret_cast (#112669) Operation memref.reinterpret_cast was accept input like: %out = memref.reinterpret_cast %in to offset: [%offset], sizes: [10], strides: [1] : memref to memref<10xf32> A problem arises: while lowering, the true offset of %out is %offset, but its data type indicates an offset of 0. Permitting this inconsistency can result in incorrect outcomes, as certain pass might erroneously extract the offset from the data type of %out. This patch fixes this by enforcing that the return value's data type aligns with the input parameter. --- .../GPU/Transforms/DecomposeMemRefs.cpp | 13 ++++++++- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 27 ++++++++++--------- .../Dialect/MemRef/Transforms/ExpandOps.cpp | 22 ++++++++++++--- .../Transforms/ExpandStridedMetadata.cpp | 17 +++++------- .../expand-then-convert-to-llvm.mlir | 10 ++----- mlir/test/Dialect/GPU/decompose-memrefs.mlir | 12 ++++----- mlir/test/Dialect/MemRef/expand-ops.mlir | 13 +++++---- .../MemRef/expand-strided-metadata.mlir | 21 +++++---------- mlir/test/Dialect/MemRef/invalid.mlir | 9 +++++++ 9 files changed, 81 insertions(+), 63 deletions(-) diff --git a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp index 2b2d10a7733eceb..004d73a77e53599 100644 --- a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp @@ -29,6 +29,17 @@ namespace mlir { using namespace mlir; +static MemRefType inferCastResultType(Value source, OpFoldResult offset) { + auto sourceType = cast(source.getType()); + SmallVector staticOffsets; + SmallVector dynamicOffsets; + dispatchIndexOpFoldResults(offset, dynamicOffsets, staticOffsets); + auto stridedLayout = + StridedLayoutAttr::get(source.getContext(), staticOffsets.front(), {}); + return MemRefType::get({}, sourceType.getElementType(), stridedLayout, + sourceType.getMemorySpace()); +} + static void setInsertionPointToStart(OpBuilder &builder, Value val) { if (auto *parentOp = val.getDefiningOp()) { builder.setInsertionPointAfter(parentOp); @@ -98,7 +109,7 @@ static Value getFlatMemref(OpBuilder &rewriter, Location loc, Value source, SmallVector offsetsTemp = getAsOpFoldResult(offsets); auto &&[base, offset, ignore] = getFlatOffsetAndStrides(rewriter, loc, source, offsetsTemp); - auto retType = cast(base.getType()); + MemRefType retType = inferCastResultType(base, offset); return rewriter.create(loc, retType, base, offset, std::nullopt, std::nullopt); } diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index d579a27359dfa01..2219505c9b802f6 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -1892,11 +1892,12 @@ LogicalResult ReinterpretCastOp::verify() { // Match sizes in result memref type and in static_sizes attribute. for (auto [idx, resultSize, expectedSize] : llvm::enumerate(resultType.getShape(), getStaticSizes())) { - if (!ShapedType::isDynamic(resultSize) && - !ShapedType::isDynamic(expectedSize) && resultSize != expectedSize) + if (!ShapedType::isDynamic(resultSize) && resultSize != expectedSize) return emitError("expected result type with size = ") - << expectedSize << " instead of " << resultSize - << " in dim = " << idx; + << (ShapedType::isDynamic(expectedSize) + ? std::string("dynamic") + : std::to_string(expectedSize)) + << " instead of " << resultSize << " in dim = " << idx; } // Match offset and strides in static_offset and static_strides attributes. If @@ -1910,20 +1911,22 @@ LogicalResult ReinterpretCastOp::verify() { // Match offset in result memref type and in static_offsets attribute. int64_t expectedOffset = getStaticOffsets().front(); - if (!ShapedType::isDynamic(resultOffset) && - !ShapedType::isDynamic(expectedOffset) && resultOffset != expectedOffset) + if (!ShapedType::isDynamic(resultOffset) && resultOffset != expectedOffset) return emitError("expected result type with offset = ") - << expectedOffset << " instead of " << resultOffset; + << (ShapedType::isDynamic(expectedOffset) + ? std::string("dynamic") + : std::to_string(expectedOffset)) + << " instead of " << resultOffset; // Match strides in result memref type and in static_strides attribute. for (auto [idx, resultStride, expectedStride] : llvm::enumerate(resultStrides, getStaticStrides())) { - if (!ShapedType::isDynamic(resultStride) && - !ShapedType::isDynamic(expectedStride) && - resultStride != expectedStride) + if (!ShapedType::isDynamic(resultStride) && resultStride != expectedStride) return emitError("expected result type with stride = ") - << expectedStride << " instead of " << resultStride - << " in dim = " << idx; + << (ShapedType::isDynamic(expectedStride) + ? std::string("dynamic") + : std::to_string(expectedStride)) + << " instead of " << resultStride << " in dim = " << idx; } return success(); diff --git a/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp index faba12f5bf82f89..83683c7e617bf80 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp @@ -89,7 +89,8 @@ struct MemRefReshapeOpConverter : public OpRewritePattern { strides.resize(rank); Location loc = op.getLoc(); - Value stride = rewriter.create(loc, 1); + Value stride = nullptr; + int64_t staticStride = 1; for (int i = rank - 1; i >= 0; --i) { Value size; // Load dynamic sizes from the shape input, use constants for static dims. @@ -105,9 +106,22 @@ struct MemRefReshapeOpConverter : public OpRewritePattern { size = rewriter.create(loc, sizeAttr); sizes[i] = sizeAttr; } - strides[i] = stride; - if (i > 0) - stride = rewriter.create(loc, stride, size); + if (stride) + strides[i] = stride; + else + strides[i] = rewriter.getIndexAttr(staticStride); + + if (i > 0) { + if (stride) { + stride = rewriter.create(loc, stride, size); + } else if (op.getType().isDynamicDim(i)) { + stride = rewriter.create( + loc, rewriter.create(loc, staticStride), + size); + } else { + staticStride *= op.getType().getDimSize(i); + } + } } rewriter.replaceOpWithNewOp( op, op.getType(), op.getSource(), /*offset=*/rewriter.getIndexAttr(0), diff --git a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp index a2049ba4a4924d2..087d1fcc2b23ae4 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp @@ -507,6 +507,8 @@ getCollapsedStride(memref::CollapseShapeOp collapseShape, OpBuilder &builder, SmallVector groupStrides; ArrayRef srcShape = sourceType.getShape(); + + OpFoldResult lastValidStride = nullptr; for (int64_t currentDim : reassocGroup) { // Skip size-of-1 dimensions, since right now their strides may be // meaningless. @@ -517,11 +519,11 @@ getCollapsedStride(memref::CollapseShapeOp collapseShape, OpBuilder &builder, continue; int64_t currentStride = strides[currentDim]; - groupStrides.push_back(ShapedType::isDynamic(currentStride) - ? origStrides[currentDim] - : builder.getIndexAttr(currentStride)); + lastValidStride = ShapedType::isDynamic(currentStride) + ? origStrides[currentDim] + : builder.getIndexAttr(currentStride); } - if (groupStrides.empty()) { + if (!lastValidStride) { // We're dealing with a 1x1x...x1 shape. The stride is meaningless, // but we still have to make the type system happy. MemRefType collapsedType = collapseShape.getResultType(); @@ -543,12 +545,7 @@ getCollapsedStride(memref::CollapseShapeOp collapseShape, OpBuilder &builder, return {builder.getIndexAttr(finalStride)}; } - // For the general case, we just want the minimum stride - // since the collapsed dimensions are contiguous. - auto minMap = AffineMap::getMultiDimIdentityMap(groupStrides.size(), - builder.getContext()); - return {makeComposedFoldedAffineMin(builder, collapseShape.getLoc(), minMap, - groupStrides)}; + return {lastValidStride}; } /// From `reshape_like(memref, subSizes, subStrides))` compute diff --git a/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir index 55b1bc9c545a855..ec5ceae57ccb33c 100644 --- a/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir @@ -425,8 +425,6 @@ func.func @collapse_shape_dynamic_with_non_identity_layout( // CHECK: %[[SIZE1:.*]] = llvm.extractvalue %[[MEM]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[SIZE2:.*]] = llvm.extractvalue %[[MEM]][3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEM]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: %[[STRIDE0_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[STRIDE0]] : i64 to index -// CHECK: %[[STRIDE0:.*]] = builtin.unrealized_conversion_cast %[[STRIDE0_TO_IDX]] : index to i64 // CHECK: %[[FINAL_SIZE1:.*]] = llvm.mul %[[SIZE1]], %[[SIZE2]] : i64 // CHECK: %[[SIZE1_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[FINAL_SIZE1]] : i64 to index // CHECK: %[[FINAL_SIZE1:.*]] = builtin.unrealized_conversion_cast %[[SIZE1_TO_IDX]] : index to i64 @@ -548,23 +546,19 @@ func.func @collapse_shape_dynamic(%arg0 : memref<1x2x?xf32>) -> memref<1x?xf32> // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[SIZE2:.*]] = llvm.extractvalue %[[MEM]][3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEM]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> -// CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEM]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64 // CHECK: %[[FINAL_SIZE1:.*]] = llvm.mul %[[SIZE2]], %[[C2]] : i64 // CHECK: %[[SIZE1_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[FINAL_SIZE1]] : i64 to index // CHECK: %[[FINAL_SIZE1:.*]] = builtin.unrealized_conversion_cast %[[SIZE1_TO_IDX]] : index to i64 -// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[MIN_STRIDE1:.*]] = llvm.intr.smin(%[[STRIDE1]], %[[C1]]) : (i64, i64) -> i64 -// CHECK: %[[MIN_STRIDE1_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[MIN_STRIDE1]] : i64 to index -// CHECK: %[[MIN_STRIDE1:.*]] = builtin.unrealized_conversion_cast %[[MIN_STRIDE1_TO_IDX]] : index to i64 // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[BASE_BUFFER]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESC1:.*]] = llvm.insertvalue %[[ALIGNED_BUFFER]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[C0]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[C1]], %[[DESC2]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[STRIDE0]], %[[DESC3]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[FINAL_SIZE1]], %[[DESC4]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> -// CHECK: %[[DESC6:.*]] = llvm.insertvalue %[[MIN_STRIDE1]], %[[DESC5]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> +// CHECK: %[[DESC6:.*]] = llvm.insertvalue %[[C1]], %[[DESC5]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> // CHECK: %[[RES:.*]] = builtin.unrealized_conversion_cast %[[DESC6]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<1x?xf32> // CHECK: return %[[RES]] : memref<1x?xf32> // CHECK: } diff --git a/mlir/test/Dialect/GPU/decompose-memrefs.mlir b/mlir/test/Dialect/GPU/decompose-memrefs.mlir index 56fc9a66b7ace71..1a1922194845170 100644 --- a/mlir/test/Dialect/GPU/decompose-memrefs.mlir +++ b/mlir/test/Dialect/GPU/decompose-memrefs.mlir @@ -7,8 +7,8 @@ // CHECK: gpu.launch // CHECK-SAME: threads(%[[TX:.*]], %[[TY:.*]], %[[TZ:.*]]) in // CHECK: %[[IDX:.*]] = affine.apply #[[MAP]]()[%[[TX]], %[[STRIDES]]#0, %[[TY]], %[[STRIDES]]#1, %[[TZ]]] -// CHECK: %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref to memref -// CHECK: memref.store %[[VAL]], %[[PTR]][] : memref +// CHECK: %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref to memref> +// CHECK: memref.store %[[VAL]], %[[PTR]][] : memref> func.func @decompose_store(%arg0 : f32, %arg1 : memref) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -33,8 +33,8 @@ func.func @decompose_store(%arg0 : f32, %arg1 : memref) { // CHECK: gpu.launch // CHECK-SAME: threads(%[[TX:.*]], %[[TY:.*]], %[[TZ:.*]]) in // CHECK: %[[IDX:.*]] = affine.apply #[[MAP]]()[%[[OFFSET]], %[[TX]], %[[STRIDES]]#0, %[[TY]], %[[STRIDES]]#1, %[[TZ]], %[[STRIDES]]#2] -// CHECK: %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref to memref -// CHECK: memref.store %[[VAL]], %[[PTR]][] : memref +// CHECK: %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref to memref> +// CHECK: memref.store %[[VAL]], %[[PTR]][] : memref> func.func @decompose_store_strided(%arg0 : f32, %arg1 : memref>) { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -59,8 +59,8 @@ func.func @decompose_store_strided(%arg0 : f32, %arg1 : memref to memref -// CHECK: %[[RES:.*]] = memref.load %[[PTR]][] : memref +// CHECK: %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref to memref> +// CHECK: %[[RES:.*]] = memref.load %[[PTR]][] : memref> // CHECK: "test.test"(%[[RES]]) : (f32) -> () func.func @decompose_load(%arg0 : memref) { %c0 = arith.constant 0 : index diff --git a/mlir/test/Dialect/MemRef/expand-ops.mlir b/mlir/test/Dialect/MemRef/expand-ops.mlir index f958a92b751a4ab..65932b5814a668c 100644 --- a/mlir/test/Dialect/MemRef/expand-ops.mlir +++ b/mlir/test/Dialect/MemRef/expand-ops.mlir @@ -52,14 +52,13 @@ func.func @memref_reshape(%input: memref<*xf32>, // CHECK-SAME: [[SRC:%.*]]: memref<*xf32>, // CHECK-SAME: [[SHAPE:%.*]]: memref<3xi32>) -> memref { -// CHECK: [[C1:%.*]] = arith.constant 1 : index // CHECK: [[C8:%.*]] = arith.constant 8 : index -// CHECK: [[STRIDE_1:%.*]] = arith.muli [[C1]], [[C8]] : index - -// CHECK: [[C1_:%.*]] = arith.constant 1 : index -// CHECK: [[DIM_1:%.*]] = memref.load [[SHAPE]]{{\[}}[[C1_]]] : memref<3xi32> +// CHECK: [[C1:%.*]] = arith.constant 1 : index +// CHECK: [[DIM_1:%.*]] = memref.load [[SHAPE]]{{\[}}[[C1]]] : memref<3xi32> // CHECK: [[SIZE_1:%.*]] = arith.index_cast [[DIM_1]] : i32 to index -// CHECK: [[STRIDE_0:%.*]] = arith.muli [[STRIDE_1]], [[SIZE_1]] : index + +// CHECK: [[C8_:%.*]] = arith.constant 8 : index +// CHECK: [[STRIDE_0:%.*]] = arith.muli [[C8_]], [[SIZE_1]] : index // CHECK: [[C0:%.*]] = arith.constant 0 : index // CHECK: [[DIM_0:%.*]] = memref.load [[SHAPE]]{{\[}}[[C0]]] : memref<3xi32> @@ -67,5 +66,5 @@ func.func @memref_reshape(%input: memref<*xf32>, // CHECK: [[RESULT:%.*]] = memref.reinterpret_cast [[SRC]] // CHECK-SAME: to offset: [0], sizes: {{\[}}[[SIZE_0]], [[SIZE_1]], 8], -// CHECK-SAME: strides: {{\[}}[[STRIDE_0]], [[STRIDE_1]], [[C1]]] +// CHECK-SAME: strides: {{\[}}[[STRIDE_0]], 8, 1] // CHECK-SAME: : memref<*xf32> to memref diff --git a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir index 8aac802ba10ae9f..647731db439c080 100644 --- a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir +++ b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir @@ -931,19 +931,15 @@ func.func @extract_aligned_pointer_as_index_of_unranked_source(%arg0: memref<*xf // = min(7, 1) // = 1 // -// CHECK-DAG: #[[$STRIDE0_MIN_MAP:.*]] = affine_map<()[s0] -> (s0)> -// CHECK-DAG: #[[$SIZE0_MAP:.*]] = affine_map<()[s0, s1] -> ((s0 * s1) * 4)> -// CHECK-DAG: #[[$STRIDE1_MIN_MAP:.*]] = affine_map<()[s0, s1] -> (s0, s1, 42)> +// CHECK: #[[$SIZE0_MAP:.*]] = affine_map<()[s0, s1] -> ((s0 * s1) * 4)> // CHECK-LABEL: func @simplify_collapse( // CHECK-SAME: %[[ARG:.*]]: memref) // // CHECK: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:6, %[[STRIDES:.*]]:6 = memref.extract_strided_metadata %[[ARG]] : memref // -// CHECK-DAG: %[[DYN_STRIDE0:.*]] = affine.min #[[$STRIDE0_MIN_MAP]]()[%[[STRIDES]]#0] -// CHECK-DAG: %[[DYN_SIZE1:.*]] = affine.apply #[[$SIZE0_MAP]]()[%[[SIZES]]#1, %[[SIZES]]#3] -// CHECK-DAG: %[[DYN_STRIDE1:.*]] = affine.min #[[$STRIDE1_MIN_MAP]]()[%[[STRIDES]]#1, %[[STRIDES]]#2] +// CHECK: %[[DYN_SIZE1:.*]] = affine.apply #[[$SIZE0_MAP]]()[%[[SIZES]]#1, %[[SIZES]]#3] // -// CHECK: %[[COLLAPSE_VIEW:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [0], sizes: [%[[SIZES]]#0, %[[DYN_SIZE1]], 42], strides: [%[[DYN_STRIDE0]], %[[DYN_STRIDE1]], 1] +// CHECK: %[[COLLAPSE_VIEW:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [0], sizes: [%[[SIZES]]#0, %[[DYN_SIZE1]], 42], strides: [%[[STRIDES]]#0, 42, 1] func.func @simplify_collapse(%arg : memref) -> memref { @@ -1046,15 +1042,12 @@ func.func @simplify_collapse_with_dim_of_size1_and_non_1_stride // We just return the first dynamic one for this group. // // -// CHECK-DAG: #[[$STRIDE0_MIN_MAP:.*]] = affine_map<()[s0, s1] -> (s0, s1)> // CHECK-LABEL: func @simplify_collapse_with_dim_of_size1_and_resulting_dyn_stride( // CHECK-SAME: %[[ARG:.*]]: memref<2x3x1x1x1xi32, strided<[?, ?, ?, ?, 2] // // CHECK: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:5, %[[STRIDES:.*]]:5 = memref.extract_strided_metadata %[[ARG]] : memref<2x3x1x1x1xi32, strided<[?, ?, ?, ?, 2], offset: ?>> // -// CHECK-DAG: %[[DYN_STRIDE0:.*]] = affine.min #[[$STRIDE0_MIN_MAP]]()[%[[STRIDES]]#0, %[[STRIDES]]#1] -// -// CHECK: %[[COLLAPSE_VIEW:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[OFFSET]]], sizes: [6, 1], strides: [%[[DYN_STRIDE0]], %[[STRIDES]]#2] +// CHECK: %[[COLLAPSE_VIEW:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[OFFSET]]], sizes: [6, 1], strides: [%[[STRIDES]]#1, %[[STRIDES]]#2] func.func @simplify_collapse_with_dim_of_size1_and_resulting_dyn_stride (%arg0: memref<2x3x1x1x1xi32, strided<[?, ?, ?, ?, 2], offset: ?>>) -> memref<6x1xi32, strided<[?, ?], offset: ?>> { @@ -1083,8 +1076,7 @@ func.func @simplify_collapse_with_dim_of_size1_and_resulting_dyn_stride // Stride 2 = origStride5 // = 1 // -// CHECK-DAG: #[[$SIZE0_MAP:.*]] = affine_map<()[s0, s1] -> ((s0 * s1) * 4)> -// CHECK-DAG: #[[$STRIDE0_MAP:.*]] = affine_map<()[s0] -> (s0)> +// CHECK: #[[$SIZE0_MAP:.*]] = affine_map<()[s0, s1] -> ((s0 * s1) * 4)> // CHECK-LABEL: func @extract_strided_metadata_of_collapse( // CHECK-SAME: %[[ARG:.*]]: memref) // @@ -1094,10 +1086,9 @@ func.func @simplify_collapse_with_dim_of_size1_and_resulting_dyn_stride // // CHECK-DAG: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:6, %[[STRIDES:.*]]:6 = memref.extract_strided_metadata %[[ARG]] : memref // -// CHECK-DAG: %[[DYN_STRIDE0:.*]] = affine.min #[[$STRIDE0_MAP]]()[%[[STRIDES]]#0] // CHECK-DAG: %[[DYN_SIZE1:.*]] = affine.apply #[[$SIZE0_MAP]]()[%[[SIZES]]#1, %[[SIZES]]#3] // -// CHECK: return %[[BASE]], %[[C0]], %[[SIZES]]#0, %[[DYN_SIZE1]], %[[C42]], %[[DYN_STRIDE0]], %[[C42]], %[[C1]] +// CHECK: return %[[BASE]], %[[C0]], %[[SIZES]]#0, %[[DYN_SIZE1]], %[[C42]], %[[STRIDES]]#0, %[[C42]], %[[C1]] func.func @extract_strided_metadata_of_collapse(%arg : memref) -> (memref, index, index, index, index, diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir index 0f533cb95a0ca93..51c4781c9022b25 100644 --- a/mlir/test/Dialect/MemRef/invalid.mlir +++ b/mlir/test/Dialect/MemRef/invalid.mlir @@ -217,6 +217,15 @@ func.func @memref_reinterpret_cast_no_map_but_offset(%in: memref) { // ----- +func.func @memref_reinterpret_cast_offset_mismatch_dynamic(%in: memref, %offset : index) { + // expected-error @+1 {{expected result type with offset = dynamic instead of 0}} + %out = memref.reinterpret_cast %in to offset: [%offset], sizes: [10], strides: [1] + : memref to memref<10xf32> + return +} + +// ----- + func.func @memref_reinterpret_cast_no_map_but_stride(%in: memref) { // expected-error @+1 {{expected result type with stride = 10 instead of 1 in dim = 0}} %out = memref.reinterpret_cast %in to offset: [0], sizes: [10], strides: [10] From a879073494e7989f670ab0432dd0047ffbf5d1cd Mon Sep 17 00:00:00 2001 From: lntue Date: Fri, 25 Oct 2024 22:11:07 -0400 Subject: [PATCH 074/425] [libc][math] Fix a bug in cbrt when the result is rounded to the next exponent. (#113749) --- libc/src/math/generic/cbrt.cpp | 4 ++-- libc/test/src/math/cbrt_test.cpp | 13 +++++++------ libc/test/src/math/smoke/cbrt_test.cpp | 2 ++ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/libc/src/math/generic/cbrt.cpp b/libc/src/math/generic/cbrt.cpp index 036664c2aafaf46..4fa24c54fdeecf0 100644 --- a/libc/src/math/generic/cbrt.cpp +++ b/libc/src/math/generic/cbrt.cpp @@ -235,10 +235,10 @@ LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { // Lambda function to update the exponent of the result. auto update_exponent = [=](double r) -> double { - uint64_t r_m = FPBits(r).uintval() & 0x800F'FFFF'FFFF'FFFF; + uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000; // Adjust exponent and sign. uint64_t r_bits = - r_m | (static_cast(out_e) << FPBits::FRACTION_LEN); + r_m + (static_cast(out_e) << FPBits::FRACTION_LEN); return FPBits(r_bits).get_val(); }; diff --git a/libc/test/src/math/cbrt_test.cpp b/libc/test/src/math/cbrt_test.cpp index 2ef2140966f52c5..2e2de16fc859d12 100644 --- a/libc/test/src/math/cbrt_test.cpp +++ b/libc/test/src/math/cbrt_test.cpp @@ -87,12 +87,13 @@ TEST_F(LlvmLibcCbrtTest, InDoubleRange) { TEST_F(LlvmLibcCbrtTest, SpecialValues) { constexpr double INPUTS[] = { - 0x1.4f61672324c8p-1028, 0x1.00152f57068b7p-1, 0x1.006509cda9886p-1, - 0x1.018369b92e523p-1, 0x1.10af932ef2bf9p-1, 0x1.1a41117939fdbp-1, - 0x1.2ae8076520d9ap-1, 0x1.a202bfc89ddffp-1, 0x1.a6bb8c803147bp-1, - 0x1.000197b499b1bp+0, 0x1.00065ed266c6cp+0, 0x1.d4306c202c4c2p+0, - 0x1.8fd409efe4851p+1, 0x1.95fd0eb31cc4p+1, 0x1.7cef1d276e335p+2, - 0x1.94910c4fc98p+2, 0x1.a0cc1327bb4c4p+2, 0x1.e7d6ebed549c4p+2, + 0x1.4f61672324c8p-1028, -0x1.fffffffffffffp-1021, 0x1.00152f57068b7p-1, + 0x1.006509cda9886p-1, 0x1.018369b92e523p-1, 0x1.10af932ef2bf9p-1, + 0x1.1a41117939fdbp-1, 0x1.2ae8076520d9ap-1, 0x1.a202bfc89ddffp-1, + 0x1.a6bb8c803147bp-1, 0x1.000197b499b1bp+0, 0x1.00065ed266c6cp+0, + 0x1.d4306c202c4c2p+0, 0x1.8fd409efe4851p+1, 0x1.95fd0eb31cc4p+1, + 0x1.7cef1d276e335p+2, 0x1.94910c4fc98p+2, 0x1.a0cc1327bb4c4p+2, + 0x1.e7d6ebed549c4p+2, }; for (double v : INPUTS) { double x = FPBits(v).get_val(); diff --git a/libc/test/src/math/smoke/cbrt_test.cpp b/libc/test/src/math/smoke/cbrt_test.cpp index 724e0e979decc18..d57cdb20de27460 100644 --- a/libc/test/src/math/smoke/cbrt_test.cpp +++ b/libc/test/src/math/smoke/cbrt_test.cpp @@ -32,4 +32,6 @@ TEST_F(LlvmLibcCbrtTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(-0x1.0p42, LIBC_NAMESPACE::cbrt(-0x1.0p126)); EXPECT_FP_EQ_ALL_ROUNDING(0x1.0p341, LIBC_NAMESPACE::cbrt(0x1.0p1023)); EXPECT_FP_EQ_ALL_ROUNDING(-0x1.0p341, LIBC_NAMESPACE::cbrt(-0x1.0p1023)); + EXPECT_FP_EQ(-0x1.0p-340, LIBC_NAMESPACE::cbrt(-0x1.fffffffffffffp-1021)); + EXPECT_FP_EQ(2.0, LIBC_NAMESPACE::cbrt(0x1.fffffffffffffp2)); } From a8ef0b33a6e5a35f341aec702fa2ceeed444f1fe Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 25 Oct 2024 19:11:38 -0700 Subject: [PATCH 075/425] [mlir] Fix build (#113750) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` mlir/lib/Transforms/Utils/DialectConversion.cpp:2851:28: error: call of overloaded ‘TypeRange(llvm::SmallVector&)’ is ambiguous assert(TypeRange(result) == resultTypes && ``` --- mlir/lib/Transforms/Utils/DialectConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 3d0c81867e0cc26..9f8a482d6e2d223 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -2848,7 +2848,7 @@ SmallVector TypeConverter::materializeTargetConversion( fn(builder, resultTypes, inputs, loc, originalType); if (result.empty()) continue; - assert(TypeRange(result) == resultTypes && + assert(TypeRange(ValueRange(result)) == resultTypes && "callback produced incorrect number of values or values with " "incorrect types"); return result; From 801f3a5400ca2fbcfdeb73fd744e8cce0cebc722 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Fri, 25 Oct 2024 19:29:21 -0700 Subject: [PATCH 076/425] [clang-format] Print the names of unfound files in error messages (#113640) Also fix the return status when `-i` is used with reading from stdin. Fixes #113631. --- clang/test/Format/error-unfound-files.cpp | 5 +++++ clang/tools/clang-format/ClangFormat.cpp | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 clang/test/Format/error-unfound-files.cpp diff --git a/clang/test/Format/error-unfound-files.cpp b/clang/test/Format/error-unfound-files.cpp new file mode 100644 index 000000000000000..1cc57ed064fb429 --- /dev/null +++ b/clang/test/Format/error-unfound-files.cpp @@ -0,0 +1,5 @@ +// RUN: rm -f a.c b.c + +// RUN: not clang-format a.c b.c 2>&1 | FileCheck %s +// CHECK: a.c: +// CHECK-NEXT: b.c: diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index 5522d05744a2b4c..cc735e48725921c 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -410,7 +410,7 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) { const bool IsSTDIN = FileName == "-"; if (!OutputXML && Inplace && IsSTDIN) { errs() << "error: cannot use -i when reading from stdin.\n"; - return false; + return true; } // On Windows, overwriting a file with an open file mapping doesn't work, // so read the whole file into memory when formatting in-place. @@ -419,7 +419,7 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) { ? MemoryBuffer::getFileAsStream(FileName) : MemoryBuffer::getFileOrSTDIN(FileName, /*IsText=*/true); if (std::error_code EC = CodeOrErr.getError()) { - errs() << EC.message() << "\n"; + errs() << FileName << ": " << EC.message() << "\n"; return true; } std::unique_ptr Code = std::move(CodeOrErr.get()); From ec2da0ca19c393053c7f11d5478ae21c27e54f5c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 25 Oct 2024 19:37:55 -0700 Subject: [PATCH 077/425] [ADT] Use data() and size() within StringRef (NFC) (#113657) This patch uses data() and size() within StringRef instead of Data and Length. This makes it easier to replace Data and Length with std::string_view in the future, which in turn allows us to forward most of StringRef functions to the counterparts in std::string_view. --- llvm/include/llvm/ADT/StringRef.h | 74 +++++++++++++++--------------- llvm/lib/Support/StringRef.cpp | 76 ++++++++++++++++--------------- 2 files changed, 77 insertions(+), 73 deletions(-) diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index 17ab10b9181f1ab..d5f30b88c4c6a2a 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -102,7 +102,7 @@ namespace llvm { /// Construct a string ref from an std::string. /*implicit*/ StringRef(const std::string &Str) - : Data(Str.data()), Length(Str.length()) {} + : Data(Str.data()), Length(Str.length()) {} /// Construct a string ref from an std::string_view. /*implicit*/ constexpr StringRef(std::string_view Str) @@ -112,9 +112,9 @@ namespace llvm { /// @name Iterators /// @{ - iterator begin() const { return Data; } + iterator begin() const { return data(); } - iterator end() const { return Data + Length; } + iterator end() const { return data() + size(); } reverse_iterator rbegin() const { return std::make_reverse_iterator(end()); @@ -143,7 +143,7 @@ namespace llvm { [[nodiscard]] constexpr const char *data() const { return Data; } /// empty - Check if the string is empty. - [[nodiscard]] constexpr bool empty() const { return Length == 0; } + [[nodiscard]] constexpr bool empty() const { return size() == 0; } /// size - Get the string size. [[nodiscard]] constexpr size_t size() const { return Length; } @@ -151,13 +151,13 @@ namespace llvm { /// front - Get the first character in the string. [[nodiscard]] char front() const { assert(!empty()); - return Data[0]; + return data()[0]; } /// back - Get the last character in the string. [[nodiscard]] char back() const { assert(!empty()); - return Data[Length-1]; + return data()[size() - 1]; } // copy - Allocate copy in Allocator and return StringRef to it. @@ -166,14 +166,14 @@ namespace llvm { // Don't request a length 0 copy from the allocator. if (empty()) return StringRef(); - char *S = A.template Allocate(Length); + char *S = A.template Allocate(size()); std::copy(begin(), end(), S); - return StringRef(S, Length); + return StringRef(S, size()); } /// Check for string equality, ignoring case. [[nodiscard]] bool equals_insensitive(StringRef RHS) const { - return Length == RHS.Length && compare_insensitive(RHS) == 0; + return size() == RHS.size() && compare_insensitive(RHS) == 0; } /// compare - Compare two strings; the result is negative, zero, or positive @@ -181,13 +181,14 @@ namespace llvm { /// the \p RHS. [[nodiscard]] int compare(StringRef RHS) const { // Check the prefix for a mismatch. - if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length))) + if (int Res = + compareMemory(data(), RHS.data(), std::min(size(), RHS.size()))) return Res < 0 ? -1 : 1; // Otherwise the prefixes match, so we only need to check the lengths. - if (Length == RHS.Length) + if (size() == RHS.size()) return 0; - return Length < RHS.Length ? -1 : 1; + return size() < RHS.size() ? -1 : 1; } /// Compare two strings, ignoring case. @@ -225,8 +226,9 @@ namespace llvm { /// str - Get the contents as an std::string. [[nodiscard]] std::string str() const { - if (!Data) return std::string(); - return std::string(Data, Length); + if (!data()) + return std::string(); + return std::string(data(), size()); } /// @} @@ -234,8 +236,8 @@ namespace llvm { /// @{ [[nodiscard]] char operator[](size_t Index) const { - assert(Index < Length && "Invalid index!"); - return Data[Index]; + assert(Index < size() && "Invalid index!"); + return data()[Index]; } /// Disallow accidental assignment from a temporary std::string. @@ -260,8 +262,8 @@ namespace llvm { /// Check if this string starts with the given \p Prefix. [[nodiscard]] bool starts_with(StringRef Prefix) const { - return Length >= Prefix.Length && - compareMemory(Data, Prefix.Data, Prefix.Length) == 0; + return size() >= Prefix.size() && + compareMemory(data(), Prefix.data(), Prefix.size()) == 0; } [[nodiscard]] bool starts_with(char Prefix) const { return !empty() && front() == Prefix; @@ -272,9 +274,9 @@ namespace llvm { /// Check if this string ends with the given \p Suffix. [[nodiscard]] bool ends_with(StringRef Suffix) const { - return Length >= Suffix.Length && - compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == - 0; + return size() >= Suffix.size() && + compareMemory(end() - Suffix.size(), Suffix.data(), + Suffix.size()) == 0; } [[nodiscard]] bool ends_with(char Suffix) const { return !empty() && back() == Suffix; @@ -342,10 +344,10 @@ namespace llvm { /// \returns The index of the last occurrence of \p C, or npos if not /// found. [[nodiscard]] size_t rfind(char C, size_t From = npos) const { - size_t I = std::min(From, Length); + size_t I = std::min(From, size()); while (I) { --I; - if (Data[I] == C) + if (data()[I] == C) return I; } return npos; @@ -447,8 +449,8 @@ namespace llvm { /// Return the number of occurrences of \p C in the string. [[nodiscard]] size_t count(char C) const { size_t Count = 0; - for (size_t I = 0; I != Length; ++I) - if (Data[I] == C) + for (size_t I = 0; I != size(); ++I) + if (data()[I] == C) ++Count; return Count; } @@ -567,8 +569,8 @@ namespace llvm { /// suffix (starting with \p Start) will be returned. [[nodiscard]] constexpr StringRef substr(size_t Start, size_t N = npos) const { - Start = std::min(Start, Length); - return StringRef(Data + Start, std::min(N, Length - Start)); + Start = std::min(Start, size()); + return StringRef(data() + Start, std::min(N, size() - Start)); } /// Return a StringRef equal to 'this' but with only the first \p N @@ -679,9 +681,9 @@ namespace llvm { /// will be returned. If this is less than \p Start, an empty string will /// be returned. [[nodiscard]] StringRef slice(size_t Start, size_t End) const { - Start = std::min(Start, Length); - End = std::clamp(End, Start, Length); - return StringRef(Data + Start, End - Start); + Start = std::min(Start, size()); + End = std::clamp(End, Start, size()); + return StringRef(data() + Start, End - Start); } /// Split into two substrings around the first occurrence of a separator @@ -786,25 +788,25 @@ namespace llvm { /// Return string with consecutive \p Char characters starting from the /// the left removed. [[nodiscard]] StringRef ltrim(char Char) const { - return drop_front(std::min(Length, find_first_not_of(Char))); + return drop_front(std::min(size(), find_first_not_of(Char))); } /// Return string with consecutive characters in \p Chars starting from /// the left removed. [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const { - return drop_front(std::min(Length, find_first_not_of(Chars))); + return drop_front(std::min(size(), find_first_not_of(Chars))); } /// Return string with consecutive \p Char characters starting from the /// right removed. [[nodiscard]] StringRef rtrim(char Char) const { - return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1)); + return drop_back(size() - std::min(size(), find_last_not_of(Char) + 1)); } /// Return string with consecutive characters in \p Chars starting from /// the right removed. [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const { - return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1)); + return drop_back(size() - std::min(size(), find_last_not_of(Chars) + 1)); } /// Return string with consecutive \p Char characters starting from the @@ -831,9 +833,9 @@ namespace llvm { // If there is no carriage return, assume unix return "\n"; } - if (Pos + 1 < Length && Data[Pos + 1] == '\n') + if (Pos + 1 < size() && data()[Pos + 1] == '\n') return "\r\n"; // Windows - if (Pos > 0 && Data[Pos - 1] == '\n') + if (Pos > 0 && data()[Pos - 1] == '\n') return "\n\r"; // You monster! return "\r"; // Classic Mac } diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp index 4bbe41688209621..4f5fcb4857e8056 100644 --- a/llvm/lib/Support/StringRef.cpp +++ b/llvm/lib/Support/StringRef.cpp @@ -35,21 +35,23 @@ static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) { } int StringRef::compare_insensitive(StringRef RHS) const { - if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length))) + if (int Res = + ascii_strncasecmp(data(), RHS.data(), std::min(size(), RHS.size()))) return Res; - if (Length == RHS.Length) + if (size() == RHS.size()) return 0; - return Length < RHS.Length ? -1 : 1; + return size() < RHS.size() ? -1 : 1; } bool StringRef::starts_with_insensitive(StringRef Prefix) const { - return Length >= Prefix.Length && - ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0; + return size() >= Prefix.size() && + ascii_strncasecmp(data(), Prefix.data(), Prefix.size()) == 0; } bool StringRef::ends_with_insensitive(StringRef Suffix) const { - return Length >= Suffix.Length && - ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; + return size() >= Suffix.size() && + ascii_strncasecmp(end() - Suffix.size(), Suffix.data(), + Suffix.size()) == 0; } size_t StringRef::find_insensitive(char C, size_t From) const { @@ -59,33 +61,33 @@ size_t StringRef::find_insensitive(char C, size_t From) const { /// compare_numeric - Compare strings, handle embedded numbers. int StringRef::compare_numeric(StringRef RHS) const { - for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) { + for (size_t I = 0, E = std::min(size(), RHS.size()); I != E; ++I) { // Check for sequences of digits. - if (isDigit(Data[I]) && isDigit(RHS.Data[I])) { + if (isDigit(data()[I]) && isDigit(RHS.data()[I])) { // The longer sequence of numbers is considered larger. // This doesn't really handle prefixed zeros well. size_t J; for (J = I + 1; J != E + 1; ++J) { - bool ld = J < Length && isDigit(Data[J]); - bool rd = J < RHS.Length && isDigit(RHS.Data[J]); + bool ld = J < size() && isDigit(data()[J]); + bool rd = J < RHS.size() && isDigit(RHS.data()[J]); if (ld != rd) return rd ? -1 : 1; if (!rd) break; } // The two number sequences have the same length (J-I), just memcmp them. - if (int Res = compareMemory(Data + I, RHS.Data + I, J - I)) + if (int Res = compareMemory(data() + I, RHS.data() + I, J - I)) return Res < 0 ? -1 : 1; // Identical number sequences, continue search after the numbers. I = J - 1; continue; } - if (Data[I] != RHS.Data[I]) - return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1; + if (data()[I] != RHS.data()[I]) + return (unsigned char)data()[I] < (unsigned char)RHS.data()[I] ? -1 : 1; } - if (Length == RHS.Length) + if (size() == RHS.size()) return 0; - return Length < RHS.Length ? -1 : 1; + return size() < RHS.size() ? -1 : 1; } // Compute the edit distance between the two given strings. @@ -128,11 +130,11 @@ std::string StringRef::upper() const { /// \return - The index of the first occurrence of \arg Str, or npos if not /// found. size_t StringRef::find(StringRef Str, size_t From) const { - if (From > Length) + if (From > size()) return npos; - const char *Start = Data + From; - size_t Size = Length - From; + const char *Start = data() + From; + size_t Size = size() - From; const char *Needle = Str.data(); size_t N = Str.size(); @@ -142,7 +144,7 @@ size_t StringRef::find(StringRef Str, size_t From) const { return npos; if (N == 1) { const char *Ptr = (const char *)::memchr(Start, Needle[0], Size); - return Ptr == nullptr ? npos : Ptr - Data; + return Ptr == nullptr ? npos : Ptr - data(); } const char *Stop = Start + (Size - N + 1); @@ -153,7 +155,7 @@ size_t StringRef::find(StringRef Str, size_t From) const { // good enough. do { if (std::memcmp(Start, Needle, 2) == 0) - return Start - Data; + return Start - data(); ++Start; } while (Start < Stop); return npos; @@ -163,7 +165,7 @@ size_t StringRef::find(StringRef Str, size_t From) const { if (Size < 16 || N > 255) { do { if (std::memcmp(Start, Needle, N) == 0) - return Start - Data; + return Start - data(); ++Start; } while (Start < Stop); return npos; @@ -179,7 +181,7 @@ size_t StringRef::find(StringRef Str, size_t From) const { uint8_t Last = Start[N - 1]; if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1])) if (std::memcmp(Start, Needle, N - 1) == 0) - return Start - Data; + return Start - data(); // Otherwise skip the appropriate number of bytes. Start += BadCharSkip[Last]; @@ -200,11 +202,11 @@ size_t StringRef::find_insensitive(StringRef Str, size_t From) const { } size_t StringRef::rfind_insensitive(char C, size_t From) const { - From = std::min(From, Length); + From = std::min(From, size()); size_t i = From; while (i != 0) { --i; - if (toLower(Data[i]) == toLower(C)) + if (toLower(data()[i]) == toLower(C)) return i; } return npos; @@ -220,9 +222,9 @@ size_t StringRef::rfind(StringRef Str) const { size_t StringRef::rfind_insensitive(StringRef Str) const { size_t N = Str.size(); - if (N > Length) + if (N > size()) return npos; - for (size_t i = Length - N + 1, e = 0; i != e;) { + for (size_t i = size() - N + 1, e = 0; i != e;) { --i; if (substr(i, N).equals_insensitive(Str)) return i; @@ -240,8 +242,8 @@ StringRef::size_type StringRef::find_first_of(StringRef Chars, for (char C : Chars) CharBits.set((unsigned char)C); - for (size_type i = std::min(From, Length), e = Length; i != e; ++i) - if (CharBits.test((unsigned char)Data[i])) + for (size_type i = std::min(From, size()), e = size(); i != e; ++i) + if (CharBits.test((unsigned char)data()[i])) return i; return npos; } @@ -262,8 +264,8 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars, for (char C : Chars) CharBits.set((unsigned char)C); - for (size_type i = std::min(From, Length), e = Length; i != e; ++i) - if (!CharBits.test((unsigned char)Data[i])) + for (size_type i = std::min(From, size()), e = size(); i != e; ++i) + if (!CharBits.test((unsigned char)data()[i])) return i; return npos; } @@ -278,8 +280,8 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars, for (char C : Chars) CharBits.set((unsigned char)C); - for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) - if (CharBits.test((unsigned char)Data[i])) + for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i) + if (CharBits.test((unsigned char)data()[i])) return i; return npos; } @@ -287,8 +289,8 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars, /// find_last_not_of - Find the last character in the string that is not /// \arg C, or npos if not found. StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const { - for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) - if (Data[i] != C) + for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i) + if (data()[i] != C) return i; return npos; } @@ -303,8 +305,8 @@ StringRef::size_type StringRef::find_last_not_of(StringRef Chars, for (char C : Chars) CharBits.set((unsigned char)C); - for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i) - if (!CharBits.test((unsigned char)Data[i])) + for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i) + if (!CharBits.test((unsigned char)data()[i])) return i; return npos; } From 8f9fc6ce4713b209e6addeb313eb4432fe374c00 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Sat, 26 Oct 2024 11:22:08 +0800 Subject: [PATCH 078/425] [mlir][GPU] Add FunctionOpInterface check for `OpToFuncCallLowering` (#113449) This PR adds a `FunctionOpInterface` check in `OpToFuncCallLowering` to resolve a crash when ops not in function. Fixes #113334. --- .../Conversion/GPUCommon/OpToFuncCallLowering.h | 5 +++++ .../Conversion/MathToROCDL/math-to-rocdl.mlir | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h index 1cf8a1acb319358..3b94abd88f9ed22 100644 --- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h +++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h @@ -61,6 +61,11 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { SourceOp>::value, "expected op with same operand and result types"); + if (!op->template getParentOfType()) { + return rewriter.notifyMatchFailure( + op, "expected op to be within a function region"); + } + SmallVector castedOperands; for (Value operand : adaptor.getOperands()) castedOperands.push_back(maybeCast(operand, rewriter)); diff --git a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir index ddd96bf797e6e71..e0ea18d41f66dae 100644 --- a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir +++ b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-math-to-rocdl -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-math-to-rocdl -allow-unregistered-dialect -split-input-file | FileCheck %s module @test_module { // CHECK: llvm.func @__ocml_fmod_f16(f16, f16) -> f16 @@ -481,3 +481,17 @@ module @test_module { func.return %resultf16, %resultf32, %resultf64, %resultbf16 : f16, f32, f64, bf16 } } + +// ----- + +// Math operation not inside function +// Ensure it not crash + +module { + "test.some_op_with_region"() ({ + ^bb0(%arg0: f64): + // CHECK: math.atan + %0 = math.atan %arg0 : f64 + "test.possible_terminator"() : () -> () + }) : () -> () +} From 5aa741d7ca21645572103d798d0b121f1a741a35 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Sat, 26 Oct 2024 11:22:57 +0800 Subject: [PATCH 079/425] [mlir][SPIRVToLLVM] Erase empty `spirv.mlir.loop` in `LoopPattern` (#113527) This PR erases `spirv.mlir.loop` with an empty region in `LoopPattern`, resolving a crash. Fixes #113404. --- mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp | 6 ++++++ .../Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp index f28473a108e1b54..87c0936cee229ec 100644 --- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp +++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp @@ -1148,6 +1148,12 @@ class LoopPattern : public SPIRVToLLVMConversion { if (loopOp.getLoopControl() != spirv::LoopControl::None) return failure(); + // `spirv.mlir.loop` with empty region is redundant and should be erased. + if (loopOp.getBody().empty()) { + rewriter.eraseOp(loopOp); + return success(); + } + Location loc = loopOp.getLoc(); // Split the current block after `spirv.mlir.loop`. The remaining ops will diff --git a/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir index 3557830e779e240..756fc5415e20f7c 100644 --- a/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir +++ b/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir @@ -86,6 +86,14 @@ spirv.module Logical GLSL450 { //===----------------------------------------------------------------------===// spirv.module Logical GLSL450 { + // CHECK-LABEL: @empty_loop + spirv.func @empty_loop() "None" { + // CHECK: llvm.return + spirv.mlir.loop { + } + spirv.Return + } + // CHECK-LABEL: @infinite_loop spirv.func @infinite_loop(%count : i32) -> () "None" { // CHECK: llvm.br ^[[BB1:.*]] From bb00f5b1edd0ed77b7e7b0113dad223505564b18 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 25 Oct 2024 20:45:44 -0700 Subject: [PATCH 080/425] [mlir][vector] Remove unneeded mask restriction (#113742) These were added when the only mapping was to LLVM. --- .../mlir/Dialect/Vector/IR/VectorOps.td | 52 +++++++++++-------- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 8 +-- mlir/test/Dialect/Vector/invalid.mlir | 4 +- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index c02b16ea931706d..e859270cf9a5e5c 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -1819,17 +1819,17 @@ def Vector_MaskedLoadOp : Vector_Op<"maskedload">, Arguments<(ins Arg:$base, Variadic:$indices, - VectorOfRankAndType<[1], [I1]>:$mask, - VectorOfRank<[1]>:$pass_thru)>, - Results<(outs VectorOfRank<[1]>:$result)> { + VectorOf<[I1]>:$mask, + AnyVector:$pass_thru)>, + Results<(outs AnyVector:$result)> { let summary = "loads elements from memory into a vector as defined by a mask vector"; let description = [{ - The masked load reads elements from memory into a 1-D vector as defined - by a base with indices and a 1-D mask vector. When the mask is set, the + The masked load reads elements from memory into a vector as defined + by a base with indices and a mask vector. When the mask is set, the element is read from memory. Otherwise, the corresponding element is taken - from a 1-D pass-through vector. Informally the semantics are: + from a pass-through vector. Informally the semantics are: ``` result[0] := if mask[0] then base[i + 0] else pass_thru[0] result[1] := if mask[1] then base[i + 1] else pass_thru[1] @@ -1882,14 +1882,14 @@ def Vector_MaskedStoreOp : Vector_Op<"maskedstore">, Arguments<(ins Arg:$base, Variadic:$indices, - VectorOfRankAndType<[1], [I1]>:$mask, - VectorOfRank<[1]>:$valueToStore)> { + VectorOf<[I1]>:$mask, + AnyVector:$valueToStore)> { let summary = "stores elements from a vector into memory as defined by a mask vector"; let description = [{ - The masked store operation writes elements from a 1-D vector into memory - as defined by a base with indices and a 1-D mask vector. When the mask is + The masked store operation writes elements from a vector into memory + as defined by a base with indices and a mask vector. When the mask is set, the corresponding element from the vector is written to memory. Otherwise, no action is taken for the element. Informally the semantics are: ``` @@ -2076,23 +2076,26 @@ def Vector_ExpandLoadOp : Vector_Op<"expandload">, Arguments<(ins Arg:$base, Variadic:$indices, - VectorOfRankAndType<[1], [I1]>:$mask, - VectorOfRank<[1]>:$pass_thru)>, - Results<(outs VectorOfRank<[1]>:$result)> { + VectorOf<[I1]>:$mask, + AnyVector:$pass_thru)>, + Results<(outs AnyVector:$result)> { let summary = "reads elements from memory and spreads them into a vector as defined by a mask"; let description = [{ - The expand load reads elements from memory into a 1-D vector as defined - by a base with indices and a 1-D mask vector. When the mask is set, the - next element is read from memory. Otherwise, the corresponding element - is taken from a 1-D pass-through vector. Informally the semantics are: + The expand load reads elements from memory into a vector as defined by a + base with indices and a mask vector. Expansion only applies to the innermost + dimension. When the mask is set, the next element is read from memory. + Otherwise, the corresponding element is taken from a pass-through vector. + Informally the semantics are: + ``` index = i result[0] := if mask[0] then base[index++] else pass_thru[0] result[1] := if mask[1] then base[index++] else pass_thru[1] etc. ``` + Note that the index increment is done conditionally. If a mask bit is set and the corresponding index is out-of-bounds for the @@ -2140,22 +2143,25 @@ def Vector_CompressStoreOp : Vector_Op<"compressstore">, Arguments<(ins Arg:$base, Variadic:$indices, - VectorOfRankAndType<[1], [I1]>:$mask, - VectorOfRank<[1]>:$valueToStore)> { + VectorOf<[I1]>:$mask, + AnyVector:$valueToStore)> { let summary = "writes elements selectively from a vector as defined by a mask"; let description = [{ - The compress store operation writes elements from a 1-D vector into memory - as defined by a base with indices and a 1-D mask vector. When the mask is - set, the corresponding element from the vector is written next to memory. - Otherwise, no action is taken for the element. Informally the semantics are: + The compress store operation writes elements from a vector into memory as + defined by a base with indices and a mask vector. Compression only applies + to the innermost dimension. When the mask is set, the corresponding element + from the vector is written next to memory. Otherwise, no action is taken + for the element. Informally the semantics are: + ``` index = i if (mask[0]) base[index++] = value[0] if (mask[1]) base[index++] = value[1] etc. ``` + Note that the index increment is done conditionally. If a mask bit is set and the corresponding index is out-of-bounds for the diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index a2abe1619454f26..d71a236f62f454d 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -4977,8 +4977,8 @@ LogicalResult MaskedLoadOp::verify() { return emitOpError("base and result element type should match"); if (llvm::size(getIndices()) != memType.getRank()) return emitOpError("requires ") << memType.getRank() << " indices"; - if (resVType.getDimSize(0) != maskVType.getDimSize(0)) - return emitOpError("expected result dim to match mask dim"); + if (resVType.getShape() != maskVType.getShape()) + return emitOpError("expected result shape to match mask shape"); if (resVType != passVType) return emitOpError("expected pass_thru of same type as result type"); return success(); @@ -5030,8 +5030,8 @@ LogicalResult MaskedStoreOp::verify() { return emitOpError("base and valueToStore element type should match"); if (llvm::size(getIndices()) != memType.getRank()) return emitOpError("requires ") << memType.getRank() << " indices"; - if (valueVType.getDimSize(0) != maskVType.getDimSize(0)) - return emitOpError("expected valueToStore dim to match mask dim"); + if (valueVType.getShape() != maskVType.getShape()) + return emitOpError("expected valueToStore shape to match mask shape"); return success(); } diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 36d04bb77e3b969..5b0fb537b35655b 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1356,7 +1356,7 @@ func.func @maskedload_base_type_mismatch(%base: memref, %mask: vector<16x func.func @maskedload_dim_mask_mismatch(%base: memref, %mask: vector<15xi1>, %pass: vector<16xf32>) { %c0 = arith.constant 0 : index - // expected-error@+1 {{'vector.maskedload' op expected result dim to match mask dim}} + // expected-error@+1 {{'vector.maskedload' op expected result shape to match mask shape}} %0 = vector.maskedload %base[%c0], %mask, %pass : memref, vector<15xi1>, vector<16xf32> into vector<16xf32> } @@ -1387,7 +1387,7 @@ func.func @maskedstore_base_type_mismatch(%base: memref, %mask: vector<16 func.func @maskedstore_dim_mask_mismatch(%base: memref, %mask: vector<15xi1>, %value: vector<16xf32>) { %c0 = arith.constant 0 : index - // expected-error@+1 {{'vector.maskedstore' op expected valueToStore dim to match mask dim}} + // expected-error@+1 {{'vector.maskedstore' op expected valueToStore shape to match mask shape}} vector.maskedstore %base[%c0], %mask, %value : memref, vector<15xi1>, vector<16xf32> } From 13d6233e77982f2a596922a79365373e1466a968 Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Sat, 26 Oct 2024 11:15:50 +0530 Subject: [PATCH 081/425] [MLIR][NVGPU] Fix nvgpu_arrive syntax in matmulBuilder.py (#113713) This patch updates the syntax for nvgpu_arrive Op in matmulBuilder.py. This fixes the compilation error for this test. For the warp-specialized matmul_kernel implementation, removing the WaitGroupSyncOp (after the mma-main-loop) fixes the hang observed. With these two fixes, the test compiles and executes successfully on an sm90a machine. Signed-off-by: Durgadoss R --- .../GPU/CUDA/sm90/python/tools/matmulBuilder.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py b/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py index 75f0dc947e06817..5394d4a3272555d 100644 --- a/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py +++ b/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py @@ -568,9 +568,7 @@ def generate_matmul_ws( barId, predicate=consumerPrimaryThread, ) - nvgpu.mbarrier_arrive( - ir.Type.parse("!nvgpu.mbarrier.token"), mbarDONE, barId - ) + nvgpu.mbarrier_arrive(mbarDONE, barId) debug_print( "[cons] iv={} | mbarDONE[{}] arrive [done]", iv, @@ -589,14 +587,9 @@ def generate_matmul_ws( # Step 6.3.5. Yield scf.yield_([new_acc, phaseParity]) - # Step 6.3. Wait All WGMMA - nvvm.WgmmaWaitGroupSyncOp(0) - with ir.InsertionPoint(scf.IfOp(consumerPrimaryThread).then_block): barId = c((K // BLOCK_K) % num_stages) - nvgpu.mbarrier_arrive( - ir.Type.parse("!nvgpu.mbarrier.token"), mbarDONE, barId - ) + nvgpu.mbarrier_arrive(mbarDONE, barId) scf.yield_([]) # Step 6.4. Epilogue (registers --> shared memory) From 3fc0d94ce57de2d0841e77c8fda7feef2923c4e0 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Sat, 26 Oct 2024 13:46:59 +0800 Subject: [PATCH 082/425] [libc++][test] Make macro detection more friendly to MSVC (#113633) MSVC STL's test suite is a bit nervous about replacing non-macro-defined identifiers with `0` (see also https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-4-c4668?view=msvc-170). On MSVC (and MS-compatible mode of other compilers), `long double` has the same format (IEEE-754 binary64) as `double`, so it should be OK to define `TEST_LONG_DOUBLE_IS_DOUBLE` when `_MSC_VER` is defined. Such detection should be performed first. --- libcxx/test/support/test_macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/support/test_macros.h b/libcxx/test/support/test_macros.h index 5ef14e54dae237c..1b6473b623c53b9 100644 --- a/libcxx/test/support/test_macros.h +++ b/libcxx/test/support/test_macros.h @@ -511,7 +511,7 @@ inline Tp const& DoNotOptimize(Tp const& value) { # define TEST_CONSTEXPR_OPERATOR_NEW #endif -#if __SIZEOF_LONG_DOUBLE__ == __SIZEOF_DOUBLE__ +#if defined(_MSC_VER) || __SIZEOF_LONG_DOUBLE__ == __SIZEOF_DOUBLE__ # define TEST_LONG_DOUBLE_IS_DOUBLE #endif From 7c9cf0c6f09115c2d948f16946aa3b36bf483f34 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sat, 26 Oct 2024 08:15:34 +0000 Subject: [PATCH 083/425] [SHT_LLVM_BB_ADDR_MAP][AsmPrinter] Emit error on bad option combinatons This patch makes it so that specifying all or none for -pgo-analysis-map along with an explicit option causes an error as this set of options does not really make sense. --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 10 ++++++++++ .../X86/basic-block-address-map-pgo-features.ll | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 2d444f2f970ac16..4ea71c9bd4ad4c0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1371,6 +1371,16 @@ static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) { static llvm::object::BBAddrMap::Features getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) { + // Ensure that the user has not passed in additional options while also + // specifying all or none. + if ((PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::None) || + PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::All)) && + popcount(PgoAnalysisMapFeatures.getBits()) != 1) { + MF.getFunction().getContext().emitError( + "-pgo-anaylsis-map can accept only all or none with no additional " + "values."); + } + bool NoFeatures = PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::None); bool AllFeatures = PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::All); bool FuncEntryCountEnabled = diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll index 1c3db738a94768b..fca5aa046b03b95 100644 --- a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll +++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll @@ -11,6 +11,10 @@ ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY +;; Verify that we emit an error if we try and specify values in addition to all/none +; RUN: not llc < %s -mtriple=x86_64 -basic-block-address-map -pgo-analysis-map=all,bb-freq +; RUN: not llc < %s -mtriple=x86_64 -basic-block-address-map -pgo-analysis-map=none,bb-freq + define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 !prof !0 { br i1 %0, label %3, label %8, !prof !1 From ef9629c64ac05d426436a0b9fd1ae9a058805368 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sat, 26 Oct 2024 12:34:45 +0300 Subject: [PATCH 084/425] Revert "Add extendhfxf2 into compiler rt (#111099)" This reverts commit 5f7bad07b9d5b6c5cfa8c16a4e62cf1e128725be. These tests fail to build in multiple configurations, see https://github.com/llvm/llvm-project/pull/111099. --- compiler-rt/lib/builtins/CMakeLists.txt | 1 - compiler-rt/lib/builtins/extendhfxf2.c | 18 ----- .../lib/builtins/macho_embedded/common.txt | 1 - .../test/builtins/Unit/extendhfxf2_test.c | 71 ------------------- .../compiler-rt/lib/builtins/BUILD.gn | 1 - 5 files changed, 92 deletions(-) delete mode 100644 compiler-rt/lib/builtins/extendhfxf2.c delete mode 100644 compiler-rt/test/builtins/Unit/extendhfxf2_test.c diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 97a9e508d37a325..9a0a50ee7003f19 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -104,7 +104,6 @@ set(GENERIC_SOURCES divti3.c extendsfdf2.c extendhfsf2.c - extendhfxf2.c ffsdi2.c ffssi2.c ffsti2.c diff --git a/compiler-rt/lib/builtins/extendhfxf2.c b/compiler-rt/lib/builtins/extendhfxf2.c deleted file mode 100644 index 7425859f79f763a..000000000000000 --- a/compiler-rt/lib/builtins/extendhfxf2.c +++ /dev/null @@ -1,18 +0,0 @@ -//===-- lib/extendhfxf2.c - half -> long double conversion --------*- C -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#define SRC_HALF -#define DST_DOUBLE -#include "fp_extend_impl.inc" - -// Use a forwarding definition and noinline to implement a poor man's alias, -// as there isn't a good cross-platform way of defining one. -// Long double are expected to be as precise as double. -COMPILER_RT_ABI NOINLINE long double __extendhfxf2(src_t a) { - return (long double)__extendXfYf2__(a); -} diff --git a/compiler-rt/lib/builtins/macho_embedded/common.txt b/compiler-rt/lib/builtins/macho_embedded/common.txt index fa99bc239e68f28..819109768f52989 100644 --- a/compiler-rt/lib/builtins/macho_embedded/common.txt +++ b/compiler-rt/lib/builtins/macho_embedded/common.txt @@ -60,7 +60,6 @@ divsf3 divsi3 extendsfdf2 extendhfsf2 -extendhfxf2 ffssi2 fixdfsi fixsfsi diff --git a/compiler-rt/test/builtins/Unit/extendhfxf2_test.c b/compiler-rt/test/builtins/Unit/extendhfxf2_test.c deleted file mode 100644 index 9972b024ab415e3..000000000000000 --- a/compiler-rt/test/builtins/Unit/extendhfxf2_test.c +++ /dev/null @@ -1,71 +0,0 @@ -// RUN: %clang_builtins %s %librt -o %t && %run %t -// REQUIRES: librt_has_extendhfxf2 - -#include -#include // for isnan, isinf -#include - -#if __LDBL_MANT_DIG__ >= 64 && defined(COMPILER_RT_HAS_FLOAT16) - -long double __extendhfxf2(_Float16 f); - -int test_extendhfxf2(_Float16 a, long double expected) { - long double x = __extendhfxf2(a); - __uint16_t *b = (void *)&a; - int ret = !((isnan(x) && isnan(expected)) || x == expected); - if (ret) { - printf("error in test__extendhfxf2(%#.4x) = %.20Lf, " - "expected %.20Lf\n", - *b, x, expected); - } - return ret; -} - -char assumption_1[sizeof(_Float16) * CHAR_BIT == 16] = {0}; - -int main() { - // Small positive value - if (test_extendhfxf2(0.09997558593750000000f, 0.09997558593750000000L)) - return 1; - - // Small negative value - if (test_extendhfxf2(-0.09997558593750000000f, -0.09997558593750000000L)) - return 1; - - // Zero - if (test_extendhfxf2(0.0f, 0.0L)) - return 1; - - // Smallest positive non-zero value - if (test_extendhfxf2(0x1p-16f, 0x1p-16L)) - return 1; - - // Smallest negative non-zero value - if (test_extendhfxf2(-0x1p-16f, -0x1p-16L)) - return 1; - - // Positive infinity - if (test_extendhfxf2(__builtin_huge_valf16(), __builtin_huge_valf64x())) - return 1; - - // Negative infinity - if (test_extendhfxf2(-__builtin_huge_valf16(), - (long double)-__builtin_huge_valf64x())) - return 1; - - // NaN - if (test_extendhfxf2(__builtin_nanf16(""), - (long double)__builtin_nanf64x(""))) - return 1; - - return 0; -} - -#else - -int main() { - printf("skipped\n"); - return 0; -} - -#endif diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn index efbf01960bf907f..8904aed28229f15 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -126,7 +126,6 @@ static_library("builtins") { "divsi3.c", "divti3.c", "extendhfsf2.c", - "extendhfxf2.c" "extendsfdf2.c", "ffsdi2.c", "ffssi2.c", From 69ead949d08ff0bb8cbbf4f7143aaa6687830f6b Mon Sep 17 00:00:00 2001 From: Thomas Fransham Date: Sat, 26 Oct 2024 11:15:37 +0100 Subject: [PATCH 085/425] [llvm] Enable building Analysis plugins on windows (#112303) Enable building InlineAdvisorPlugin and InlineOrderPlugin on windows for shared library builds. This is part of the work to enable LLVM_BUILD_LLVM_DYLIB and LLVM plugins on window. --- llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt | 2 +- llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt b/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt index 22cd0cb1d9a034d..deabf110f2e4399 100644 --- a/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt +++ b/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt @@ -2,7 +2,7 @@ # libraries, but expects them to exist in the process loading the plugin. This # doesn't work with DLLs on Windows (where a shared library can't have undefined # references), so just skip this testcase on Windows. -if (NOT WIN32 AND NOT CYGWIN) +if ((NOT WIN32 OR LLVM_BUILD_LLVM_DYLIB) AND NOT CYGWIN) unset(LLVM_LINK_COMPONENTS) add_llvm_library(InlineAdvisorPlugin MODULE BUILDTREE_ONLY InlineAdvisorPlugin.cpp diff --git a/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt b/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt index cc470a934426d54..0b37cebe3da6de8 100644 --- a/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt +++ b/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt @@ -2,7 +2,7 @@ # libraries, but expects them to exist in the process loading the plugin. This # doesn't work with DLLs on Windows (where a shared library can't have undefined # references), so just skip this testcase on Windows. -if (NOT WIN32 AND NOT CYGWIN) +if ((NOT WIN32 OR LLVM_BUILD_LLVM_DYLIB) AND NOT CYGWIN) unset(LLVM_LINK_COMPONENTS) add_llvm_library(InlineOrderPlugin MODULE BUILDTREE_ONLY InlineOrderPlugin.cpp From 4102625380823e58d7b13f01b5bd979a29bce19e Mon Sep 17 00:00:00 2001 From: davidtrevelyan Date: Sat, 26 Oct 2024 13:06:11 +0100 Subject: [PATCH 086/425] [rtsan][llvm][NFC] Rename sanitize_realtime_unsafe attr to sanitize_realtime_blocking (#113155) # What This PR renames the newly-introduced llvm attribute `sanitize_realtime_unsafe` to `sanitize_realtime_blocking`. Likewise, sibling variables such as `SanitizeRealtimeUnsafe` are renamed to `SanitizeRealtimeBlocking` respectively. There are no other functional changes. # Why? - There are a number of problems that can cause a function to be real-time "unsafe", - we wish to communicate what problems rtsan detects and *why* they're unsafe, and - a generic "unsafe" attribute is, in our opinion, too broad a net - which may lead to future implementations that need extra contextual information passed through them in order to communicate meaningful reasons to users. - We want to avoid this situation and make the runtime library boundary API/ABI as simple as possible, and - we believe that restricting the scope of attributes to names like `sanitize_realtime_blocking` is an effective means of doing so. We also feel that the symmetry between `[[clang::blocking]]` and `sanitize_realtime_blocking` is easier to follow as a developer. # Concerns - I'm aware that the LLVM attribute `sanitize_realtime_unsafe` has been part of the tree for a few weeks now (introduced here: https://github.com/llvm/llvm-project/pull/106754). Given that it hasn't been released in version 20 yet, am I correct in considering this to not be a breaking change? --- clang/lib/CodeGen/CodeGenFunction.cpp | 2 +- clang/test/CodeGen/rtsan_attribute_inserted.c | 2 +- clang/test/CodeGen/rtsan_no_attribute_sanitizer_disabled.c | 2 +- compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp | 4 ++-- llvm/docs/LangRef.rst | 2 +- llvm/include/llvm/Bitcode/LLVMBitCodes.h | 2 +- llvm/include/llvm/IR/Attributes.td | 4 ++-- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 4 ++-- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 4 ++-- llvm/lib/IR/Verifier.cpp | 4 ++-- llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp | 6 +++--- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 2 +- llvm/test/Bitcode/attributes.ll | 4 ++-- llvm/test/Bitcode/compatibility.ll | 6 +++--- .../{rtsan_unsafe.ll => rtsan_blocking.ll} | 4 ++-- llvm/test/Verifier/rtsan-attrs.ll | 4 ++-- 16 files changed, 28 insertions(+), 28 deletions(-) rename llvm/test/Instrumentation/RealtimeSanitizer/{rtsan_unsafe.ll => rtsan_blocking.ll} (87%) diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 573ced0857d5f5f..6ead45793742d6c 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -852,7 +852,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (Fe.Effect.kind() == FunctionEffect::Kind::NonBlocking) Fn->addFnAttr(llvm::Attribute::SanitizeRealtime); else if (Fe.Effect.kind() == FunctionEffect::Kind::Blocking) - Fn->addFnAttr(llvm::Attribute::SanitizeRealtimeUnsafe); + Fn->addFnAttr(llvm::Attribute::SanitizeRealtimeBlocking); } // Apply fuzzing attribute to the function. diff --git a/clang/test/CodeGen/rtsan_attribute_inserted.c b/clang/test/CodeGen/rtsan_attribute_inserted.c index b21ecb6b6b06a90..cebfe43c81234cf 100644 --- a/clang/test/CodeGen/rtsan_attribute_inserted.c +++ b/clang/test/CodeGen/rtsan_attribute_inserted.c @@ -8,4 +8,4 @@ float process(float *a) [[clang::nonblocking]] { return *a; } int spinlock(int *a) [[clang::blocking]] { return *a; } // CHECK: @spinlock{{.*}} #1 { // CHECK: attributes #1 = { -// CHECK-SAME: {{.*sanitize_realtime_unsafe .*}} +// CHECK-SAME: {{.*sanitize_realtime_blocking .*}} diff --git a/clang/test/CodeGen/rtsan_no_attribute_sanitizer_disabled.c b/clang/test/CodeGen/rtsan_no_attribute_sanitizer_disabled.c index 0f43007c5e4c161..86305080c94acee 100644 --- a/clang/test/CodeGen/rtsan_no_attribute_sanitizer_disabled.c +++ b/clang/test/CodeGen/rtsan_no_attribute_sanitizer_disabled.c @@ -5,4 +5,4 @@ int spinlock(int *a) [[clang::blocking]] { return *a; } // Without the -fsanitize=realtime flag, we shouldn't attach the attributes. // CHECK-NOT: {{.*sanitize_realtime .*}} -// CHECK-NOT: {{.*sanitize_realtime_unsafe .*}} +// CHECK-NOT: {{.*sanitize_realtime_blocking .*}} diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp index 9e455f0326a549e..ed9ee4ded7b0598 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp @@ -204,11 +204,11 @@ TEST(TestRtsan, ThrowingAnExceptionDiesWhenRealtime) { TEST(TestRtsan, DoesNotDieIfTurnedOff) { std::mutex mutex; - auto RealtimeUnsafeFunc = [&]() { + auto RealtimeBlockingFunc = [&]() { __rtsan_disable(); mutex.lock(); mutex.unlock(); __rtsan_enable(); }; - RealtimeInvoke(RealtimeUnsafeFunc); + RealtimeInvoke(RealtimeBlockingFunc); } diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index b83675c6ed97aa8..f9ec33da1b651b3 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2334,7 +2334,7 @@ example: This attribute indicates that RealtimeSanitizer checks (realtime safety analysis - no allocations, syscalls or exceptions) are enabled for this function. -``sanitize_realtime_unsafe`` +``sanitize_realtime_blocking`` This attribute indicates that RealtimeSanitizer should error immediately if the attributed function is called during invocation of a function attributed with ``sanitize_realtime``. diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 08574cc356e5144..41a6447356c23b6 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -768,7 +768,7 @@ enum AttributeKindCodes { ATTR_KIND_INITIALIZES = 94, ATTR_KIND_HYBRID_PATCHABLE = 95, ATTR_KIND_SANITIZE_REALTIME = 96, - ATTR_KIND_SANITIZE_REALTIME_UNSAFE = 97, + ATTR_KIND_SANITIZE_REALTIME_BLOCKING = 97, ATTR_KIND_CORO_ELIDE_SAFE = 98, ATTR_KIND_NO_EXT = 99, ATTR_KIND_NO_DIVERGENCE_SOURCE = 100, diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index b6d36a5f7ae4fb3..49f4527bde66e7c 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -334,7 +334,7 @@ def SanitizeRealtime : EnumAttr<"sanitize_realtime", IntersectPreserve, [FnAttr] /// RealtimeSanitizer should error if a real-time unsafe function is invoked /// during a real-time sanitized function (see `sanitize_realtime`). -def SanitizeRealtimeUnsafe : EnumAttr<"sanitize_realtime_unsafe", IntersectPreserve, [FnAttr]>; +def SanitizeRealtimeBlocking : EnumAttr<"sanitize_realtime_blocking", IntersectPreserve, [FnAttr]>; /// Speculative Load Hardening is enabled. /// @@ -430,7 +430,7 @@ def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; -def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 4aea059551dedce..446c98c8cecd884 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2165,8 +2165,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::SanitizeNumericalStability; case bitc::ATTR_KIND_SANITIZE_REALTIME: return Attribute::SanitizeRealtime; - case bitc::ATTR_KIND_SANITIZE_REALTIME_UNSAFE: - return Attribute::SanitizeRealtimeUnsafe; + case bitc::ATTR_KIND_SANITIZE_REALTIME_BLOCKING: + return Attribute::SanitizeRealtimeBlocking; case bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING: return Attribute::SpeculativeLoadHardening; case bitc::ATTR_KIND_SWIFT_ERROR: diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index d9002149fba55af..ee9cc4b6e0c0ebd 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -853,8 +853,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY; case Attribute::SanitizeRealtime: return bitc::ATTR_KIND_SANITIZE_REALTIME; - case Attribute::SanitizeRealtimeUnsafe: - return bitc::ATTR_KIND_SANITIZE_REALTIME_UNSAFE; + case Attribute::SanitizeRealtimeBlocking: + return bitc::ATTR_KIND_SANITIZE_REALTIME_BLOCKING; case Attribute::SpeculativeLoadHardening: return bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING; case Attribute::SwiftError: diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 60e65392218dadf..ee807ca13787d5e 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2235,9 +2235,9 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, } Check(!(Attrs.hasFnAttr(Attribute::SanitizeRealtime) && - Attrs.hasFnAttr(Attribute::SanitizeRealtimeUnsafe)), + Attrs.hasFnAttr(Attribute::SanitizeRealtimeBlocking)), "Attributes " - "'sanitize_realtime and sanitize_realtime_unsafe' are incompatible!", + "'sanitize_realtime and sanitize_realtime_blocking' are incompatible!", V); if (Attrs.hasFnAttr(Attribute::OptimizeForDebugging)) { diff --git a/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp index c4cb72ab2e4da90..88cb04695217d50 100644 --- a/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp @@ -69,7 +69,7 @@ static PreservedAnalyses runSanitizeRealtime(Function &Fn) { return rtsanPreservedCFGAnalyses(); } -static PreservedAnalyses runSanitizeRealtimeUnsafe(Function &Fn) { +static PreservedAnalyses runSanitizeRealtimeBlocking(Function &Fn) { IRBuilder<> Builder(&Fn.front().front()); Value *Name = Builder.CreateGlobalString(demangle(Fn.getName())); insertCallAtFunctionEntryPoint(Fn, "__rtsan_notify_blocking_call", {Name}); @@ -84,8 +84,8 @@ PreservedAnalyses RealtimeSanitizerPass::run(Function &Fn, if (Fn.hasFnAttribute(Attribute::SanitizeRealtime)) return runSanitizeRealtime(Fn); - if (Fn.hasFnAttribute(Attribute::SanitizeRealtimeUnsafe)) - return runSanitizeRealtimeUnsafe(Fn); + if (Fn.hasFnAttribute(Attribute::SanitizeRealtimeBlocking)) + return runSanitizeRealtimeBlocking(Fn); return PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 15b26a38cc28ef2..ed4ad15e5ab6952 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -953,7 +953,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::SanitizeHWAddress: case Attribute::SanitizeMemTag: case Attribute::SanitizeRealtime: - case Attribute::SanitizeRealtimeUnsafe: + case Attribute::SanitizeRealtimeBlocking: case Attribute::SpeculativeLoadHardening: case Attribute::StackProtect: case Attribute::StackProtectReq: diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll index 737f49aa86a7ba2..492de663884df4a 100644 --- a/llvm/test/Bitcode/attributes.ll +++ b/llvm/test/Bitcode/attributes.ll @@ -512,7 +512,7 @@ define void @f92() sanitize_realtime } ; CHECK: define void @f93() #54 -define void @f93() sanitize_realtime_unsafe { +define void @f93() sanitize_realtime_blocking { ret void; } @@ -616,7 +616,7 @@ define void @initializes(ptr initializes((-4, 0), (4, 8)) %a) { ; CHECK: attributes #51 = { uwtable(sync) } ; CHECK: attributes #52 = { nosanitize_bounds } ; CHECK: attributes #53 = { sanitize_realtime } -; CHECK: attributes #54 = { sanitize_realtime_unsafe } +; CHECK: attributes #54 = { sanitize_realtime_blocking } ; CHECK: attributes [[FNRETTHUNKEXTERN]] = { fn_ret_thunk_extern } ; CHECK: attributes [[SKIPPROFILE]] = { skipprofile } ; CHECK: attributes [[OPTDEBUG]] = { optdebug } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 280c3a99d7535f8..a849789da536ace 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -2048,8 +2048,8 @@ declare void @f.sanitize_numerical_stability() sanitize_numerical_stability declare void @f.sanitize_realtime() sanitize_realtime ; CHECK: declare void @f.sanitize_realtime() #52 -declare void @f.sanitize_realtime_unsafe() sanitize_realtime_unsafe -; CHECK: declare void @f.sanitize_realtime_unsafe() #53 +declare void @f.sanitize_realtime_blocking() sanitize_realtime_blocking +; CHECK: declare void @f.sanitize_realtime_blocking() #53 ; CHECK: declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan)) @@ -2183,7 +2183,7 @@ define float @nofpclass_callsites(float %arg, { float } %arg1) { ; CHECK: attributes #50 = { allockind("alloc,uninitialized") } ; CHECK: attributes #51 = { sanitize_numerical_stability } ; CHECK: attributes #52 = { sanitize_realtime } -; CHECK: attributes #53 = { sanitize_realtime_unsafe } +; CHECK: attributes #53 = { sanitize_realtime_blocking } ; CHECK: attributes #54 = { builtin } ;; Metadata diff --git a/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_unsafe.ll b/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_blocking.ll similarity index 87% rename from llvm/test/Instrumentation/RealtimeSanitizer/rtsan_unsafe.ll rename to llvm/test/Instrumentation/RealtimeSanitizer/rtsan_blocking.ll index 5abf5de3044816e..80eb28c3923c2de 100644 --- a/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_unsafe.ll +++ b/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_blocking.ll @@ -25,7 +25,7 @@ define noundef i32 @main() #2 { ret i32 0 } -attributes #0 = { mustprogress noinline sanitize_realtime_unsafe optnone ssp uwtable(sync) } +attributes #0 = { mustprogress noinline sanitize_realtime_blocking optnone ssp uwtable(sync) } ;. -; CHECK: attributes #[[ATTR0]] = { mustprogress noinline optnone sanitize_realtime_unsafe ssp uwtable(sync) } +; CHECK: attributes #[[ATTR0]] = { mustprogress noinline optnone sanitize_realtime_blocking ssp uwtable(sync) } ;. diff --git a/llvm/test/Verifier/rtsan-attrs.ll b/llvm/test/Verifier/rtsan-attrs.ll index fcc44d8d63c1deb..c813266b434f8ce 100644 --- a/llvm/test/Verifier/rtsan-attrs.ll +++ b/llvm/test/Verifier/rtsan-attrs.ll @@ -1,9 +1,9 @@ ; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s -; CHECK: Attributes 'sanitize_realtime and sanitize_realtime_unsafe' are incompatible! +; CHECK: Attributes 'sanitize_realtime and sanitize_realtime_blocking' are incompatible! ; CHECK-NEXT: ptr @sanitize_unsafe define void @sanitize_unsafe() #0 { ret void } -attributes #0 = { sanitize_realtime sanitize_realtime_unsafe } +attributes #0 = { sanitize_realtime sanitize_realtime_blocking } From 0cf7aaf30067c4be2886a8c9127a27dcbfd63b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Sat, 26 Oct 2024 05:54:04 -0700 Subject: [PATCH 087/425] [MLIR][Vector] Update Transfer{Read|Write}DropUnitDimsPattern patterns (#112394) Updates `TransferWriteDropUnitDimsPattern` and `TransferReadDropUnitDimsPattern` to inherit from `MaskableOpRewritePattern` so that masked versions of xfer_read/xfer_write Ops are also supported: ```mlir %v = vector.mask %mask { vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, vector<3x2xi8> } : vector<3x2xi1> -> vector<3x2xi8> ``` --- .../Transforms/VectorTransferOpTransforms.cpp | 67 +++++++++++---- mlir/test/Dialect/Vector/invalid.mlir | 9 +++ ...ctor-transfer-drop-unit-dims-patterns.mlir | 81 ++++++++++++++++++- 3 files changed, 139 insertions(+), 18 deletions(-) diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp index e05c801121ffc44..3a30382114c8dc7 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp @@ -354,11 +354,13 @@ namespace { /// inserting a memref.subview dropping those unit dims. The vector shapes are /// also reduced accordingly. class TransferReadDropUnitDimsPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; + : public vector::MaskableOpRewritePattern { + using MaskableOpRewritePattern::MaskableOpRewritePattern; - LogicalResult matchAndRewrite(vector::TransferReadOp transferReadOp, - PatternRewriter &rewriter) const override { + FailureOr + matchAndRewriteMaskableOp(vector::TransferReadOp transferReadOp, + vector::MaskingOpInterface maskingOp, + PatternRewriter &rewriter) const override { auto loc = transferReadOp.getLoc(); Value vector = transferReadOp.getVector(); VectorType vectorType = cast(vector.getType()); @@ -376,6 +378,10 @@ class TransferReadDropUnitDimsPattern int reducedRank = getReducedRank(sourceType.getShape()); if (reducedRank == sourceType.getRank()) return failure(); + // TODO: Extend vector.mask to support 0-d vectors. In the meantime, bail + // out. + if (reducedRank == 0 && maskingOp) + return failure(); // Check if the reduced vector shape matches the reduced source shape. // Otherwise, this case is not supported yet. VectorType reducedVectorType = trimNonScalableUnitDims(vectorType); @@ -406,15 +412,23 @@ class TransferReadDropUnitDimsPattern SmallVector zeros(reducedRank, c0); auto identityMap = rewriter.getMultiDimIdentityMap(reducedRank); SmallVector inBounds(reducedVectorType.getRank(), true); - auto newTransferReadOp = rewriter.create( + Operation *newTransferReadOp = rewriter.create( loc, reducedVectorType, reducedShapeSource, zeros, identityMap, transferReadOp.getPadding(), maskOp, rewriter.getBoolArrayAttr(inBounds)); + + if (maskingOp) { + auto shapeCastMask = rewriter.createOrFold( + loc, reducedVectorType.cloneWith(std::nullopt, rewriter.getI1Type()), + maskingOp.getMask()); + newTransferReadOp = mlir::vector::maskOperation( + rewriter, newTransferReadOp, shapeCastMask); + } + auto shapeCast = rewriter.createOrFold( - loc, vectorType, newTransferReadOp); - rewriter.replaceOp(transferReadOp, shapeCast); + loc, vectorType, newTransferReadOp->getResults()[0]); - return success(); + return shapeCast; } }; @@ -422,11 +436,13 @@ class TransferReadDropUnitDimsPattern /// has unit dims, by inserting a `memref.subview` dropping those unit dims. The /// vector shapes are also reduced accordingly. class TransferWriteDropUnitDimsPattern - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; + : public vector::MaskableOpRewritePattern { + using MaskableOpRewritePattern::MaskableOpRewritePattern; - LogicalResult matchAndRewrite(vector::TransferWriteOp transferWriteOp, - PatternRewriter &rewriter) const override { + FailureOr + matchAndRewriteMaskableOp(vector::TransferWriteOp transferWriteOp, + vector::MaskingOpInterface maskingOp, + PatternRewriter &rewriter) const override { auto loc = transferWriteOp.getLoc(); Value vector = transferWriteOp.getVector(); VectorType vectorType = cast(vector.getType()); @@ -444,6 +460,10 @@ class TransferWriteDropUnitDimsPattern int reducedRank = getReducedRank(sourceType.getShape()); if (reducedRank == sourceType.getRank()) return failure(); + // TODO: Extend vector.mask to support 0-d vectors. In the meantime, bail + // out. + if (reducedRank == 0 && maskingOp) + return failure(); // Check if the reduced vector shape matches the reduced destination shape. // Otherwise, this case is not supported yet. VectorType reducedVectorType = trimNonScalableUnitDims(vectorType); @@ -474,13 +494,26 @@ class TransferWriteDropUnitDimsPattern SmallVector zeros(reducedRank, c0); auto identityMap = rewriter.getMultiDimIdentityMap(reducedRank); SmallVector inBounds(reducedVectorType.getRank(), true); - auto shapeCast = rewriter.createOrFold( + auto shapeCastSrc = rewriter.createOrFold( loc, reducedVectorType, vector); - rewriter.replaceOpWithNewOp( - transferWriteOp, Type(), shapeCast, reducedShapeSource, zeros, - identityMap, maskOp, rewriter.getBoolArrayAttr(inBounds)); + Operation *newXferWrite = rewriter.create( + loc, Type(), shapeCastSrc, reducedShapeSource, zeros, identityMap, + maskOp, rewriter.getBoolArrayAttr(inBounds)); + + if (maskingOp) { + auto shapeCastMask = rewriter.createOrFold( + loc, reducedVectorType.cloneWith(std::nullopt, rewriter.getI1Type()), + maskingOp.getMask()); + newXferWrite = + mlir::vector::maskOperation(rewriter, newXferWrite, shapeCastMask); + } - return success(); + if (transferWriteOp.hasPureTensorSemantics()) + return newXferWrite->getResults()[0]; + + // With Memref semantics, there's no return value. Use empty value to signal + // success. + return Value(); } }; diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 5b0fb537b35655b..56039d04549aa53 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -1717,6 +1717,15 @@ func.func @vector_mask_shape_mismatch(%a: vector<8xi32>, %m0: vector<16xi1>) -> // ----- +func.func @vector_mask_passthru_type_mismatch(%t0: tensor, %m0: vector) -> vector { + %ft0 = arith.constant 0.0 : f32 + // expected-error@+1 {{'vector.mask' op operand #0 must be vector of 1-bit signless integer values, but got 'vector'}} + %0 = vector.mask %m0 { vector.transfer_read %t0[], %ft0 : tensor, vector } : vector -> vector + return %0 : vector +} + +// ----- + // expected-note@+1 {{prior use here}} func.func @vector_mask_passthru_type_mismatch(%t0: tensor, %idx: index, %m0: vector<16xi1>, %pt0: vector<16xi32>) -> vector<16xf32> { %ft0 = arith.constant 0.0 : f32 diff --git a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir index e9d12b044e2c7e8..8234351302f6b56 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir @@ -1,5 +1,9 @@ // RUN: mlir-opt %s --transform-interpreter | FileCheck %s +//----------------------------------------------------------------------------- +// [Patterns: TransferWriteDropUnitDimsPattern, TransferReadeDropUnitDimsPattern] +//----------------------------------------------------------------------------- + func.func @transfer_read_rank_reducing( %arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>) -> vector<3x2xi8> { %c0 = arith.constant 0 : index @@ -14,7 +18,29 @@ func.func @transfer_read_rank_reducing( // CHECK-SAME: memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}> // CHECK: vector.transfer_read %[[SUBVIEW]] -func.func @transfer_write_rank_reducing(%arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, %vec : vector<3x2xi8>) { +func.func @transfer_read_rank_reducing_masked( + %arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, + %mask: vector<3x2xi1>) -> vector<3x2xi8> { + %c0 = arith.constant 0 : index + %cst = arith.constant 0 : i8 + %v = vector.mask %mask { + vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst : + memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, vector<3x2xi8> + } : vector<3x2xi1> -> vector<3x2xi8> + return %v : vector<3x2xi8> +} +// CHECK-LABEL: func @transfer_read_rank_reducing_masked +// CHECK-SAME: %[[ARG:.+]]: memref<1x1x3x2xi8 +// CHECK-SAME: %[[MASK:.+]]: vector<3x2xi1> +// CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0] [1, 1, 3, 2] [1, 1, 1, 1] +// CHECK-SAME: memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}> +// CHECK: vector.mask %[[MASK]] +// CHECK-SAME: vector.transfer_read %[[SUBVIEW]] + +func.func @transfer_write_rank_reducing( + %arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, + %vec : vector<3x2xi8>) { + %c0 = arith.constant 0 : index vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] : vector<3x2xi8>, memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>> @@ -26,6 +52,26 @@ func.func @transfer_write_rank_reducing(%arg : memref<1x1x3x2xi8, strided<[6, 6, // CHECK-SAME: memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}> // CHECK: vector.transfer_write %{{.*}}, %[[SUBVIEW]] +func.func @transfer_write_rank_reducing_masked( + %arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, + %vec : vector<3x2xi8>, + %mask: vector<3x2xi1>) { + %c0 = arith.constant 0 : index + vector.mask %mask { + vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] : + vector<3x2xi8>, memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>> + } : vector<3x2xi1> + return +} +// CHECK-LABEL: func @transfer_write_rank_reducing_masked +// CHECK-SAME: %[[ARG:.+]]: memref<1x1x3x2xi8 +// CHECK-SAME: %[[VEC:.+]]: vector<3x2xi8> +// CHECK-SAME: %[[MASK:.+]]: vector<3x2xi1> +// CHECK: %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0] [1, 1, 3, 2] [1, 1, 1, 1] +// CHECK-SAME: memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}> +// CHECK: vector.mask %[[MASK]] +// CHECK-SAME: vector.transfer_write %{{.*}}, %[[SUBVIEW]] + func.func @transfer_read_and_vector_rank_reducing( %arg : memref<1x1x3x2x1xf32>) -> vector<3x2x1xf32> { %c0 = arith.constant 0 : index @@ -68,6 +114,22 @@ func.func @transfer_read_and_vector_rank_reducing_to_0d( // CHECK: %[[READ:.+]] = vector.transfer_read %[[SUBVIEW]]{{.*}} : memref, vector // CHECK: vector.shape_cast %[[READ]] : vector to vector<1x1x1xf32> +func.func @transfer_read_and_vector_rank_reducing_to_0d_masked( + %arg : memref<1x1x1x1x1xf32>, + %mask: vector<1x1x1xi1>) -> vector<1x1x1xf32> { + + %c0 = arith.constant 0 : index + %cst = arith.constant 0.0 : f32 + %v = vector.mask %mask { + vector.transfer_read %arg[%c0, %c0, %c0, %c0, %c0], %cst + : memref<1x1x1x1x1xf32>, vector<1x1x1xf32> + } : vector<1x1x1xi1> -> vector<1x1x1xf32> + return %v : vector<1x1x1xf32> +} +// CHECK-LABEL: func @transfer_read_and_vector_rank_reducing_to_0d_masked +// CHECK-NOT: vector.shape_cast +// CHECK-NOT: memref.subview + func.func @transfer_write_and_vector_rank_reducing_to_0d( %arg : memref<1x1x1x1x1xf32>, %vec : vector<1x1x1xf32>) { @@ -82,6 +144,23 @@ func.func @transfer_write_and_vector_rank_reducing_to_0d( // CHECK: %[[SHCAST:.+]] = vector.shape_cast %[[VECTOR]] : vector<1x1x1xf32> to vector // CHECK: vector.transfer_write %[[SHCAST]], %[[SUBVIEW]]{{.*}} : vector, memref +func.func @transfer_write_and_vector_rank_reducing_to_0d_masked( + %arg : memref<1x1x1x1x1xf32>, + %vec : vector<1x1x1xf32>, + %mask: vector<1x1x1xi1>) { + + %c0 = arith.constant 0 : index + %cst = arith.constant 0.0 : f32 + vector.mask %mask { + vector.transfer_write %vec, %arg[%c0, %c0, %c0, %c0, %c0] : + vector<1x1x1xf32>, memref<1x1x1x1x1xf32> + } : vector<1x1x1xi1> + return +} +// CHECK-LABEL: func @transfer_write_and_vector_rank_reducing_to_0d_masked +// CHECK-NOT: vector.shape_cast +// CHECK-NOT: memref.subview + func.func @transfer_read_dynamic_rank_reducing( %arg : memref>) -> vector<[16]x1xi8> { %c0 = arith.constant 0 : index From e146c1867e8decfd423034f63a3a863733e03f04 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Sat, 26 Oct 2024 09:03:04 -0400 Subject: [PATCH 088/425] [libc++] Split Apple and Freebsd locale support into their own headers (#113737) For now these headers don't provide much benefit, however as we refactor the locale base API they will provide a location to specify the localization interface on these platforms. --- libcxx/include/CMakeLists.txt | 2 ++ libcxx/include/__locale_dir/locale_base_api.h | 6 ++++-- .../include/__locale_dir/locale_base_api/apple.h | 15 +++++++++++++++ .../__locale_dir/locale_base_api/freebsd.h | 15 +++++++++++++++ libcxx/include/module.modulemap | 2 ++ 5 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 libcxx/include/__locale_dir/locale_base_api/apple.h create mode 100644 libcxx/include/__locale_dir/locale_base_api/freebsd.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 506ed721d0843ec..bb152af82cad5c3 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -492,8 +492,10 @@ set(files __locale __locale_dir/locale_base_api.h __locale_dir/locale_base_api/android.h + __locale_dir/locale_base_api/apple.h __locale_dir/locale_base_api/bsd_locale_defaults.h __locale_dir/locale_base_api/bsd_locale_fallbacks.h + __locale_dir/locale_base_api/freebsd.h __locale_dir/locale_base_api/fuchsia.h __locale_dir/locale_base_api/ibm.h __locale_dir/locale_base_api/musl.h diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h index eab7fa8bf62faec..b6c80255b4d1991 100644 --- a/libcxx/include/__locale_dir/locale_base_api.h +++ b/libcxx/include/__locale_dir/locale_base_api.h @@ -21,8 +21,10 @@ # include <__locale_dir/locale_base_api/fuchsia.h> #elif defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) # include <__locale_dir/locale_base_api/musl.h> -#elif defined(__APPLE__) || defined(__FreeBSD__) -# include +#elif defined(__APPLE__) +# include <__locale_dir/locale_base_api/apple.h> +#elif defined(__FreeBSD__) +# include <__locale_dir/locale_base_api/freebsd.h> #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__locale_dir/locale_base_api/apple.h b/libcxx/include/__locale_dir/locale_base_api/apple.h new file mode 100644 index 000000000000000..ec5986c3a19f105 --- /dev/null +++ b/libcxx/include/__locale_dir/locale_base_api/apple.h @@ -0,0 +1,15 @@ +// -*- C++ -*- +//===-----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_APPLE_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_APPLE_H + +#include + +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_APPLE_H diff --git a/libcxx/include/__locale_dir/locale_base_api/freebsd.h b/libcxx/include/__locale_dir/locale_base_api/freebsd.h new file mode 100644 index 000000000000000..45ecf1977471b85 --- /dev/null +++ b/libcxx/include/__locale_dir/locale_base_api/freebsd.h @@ -0,0 +1,15 @@ +// -*- C++ -*- +//===-----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FREEBSD_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FREEBSD_H + +#include + +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FREEBSD_H diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index f92e8bf5fc9aba5..05d08cfbd7cd294 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1446,8 +1446,10 @@ module std [system] { header "__locale_dir/locale_guard.h" module locale_base_api { textual header "__locale_dir/locale_base_api/android.h" + textual header "__locale_dir/locale_base_api/apple.h" textual header "__locale_dir/locale_base_api/bsd_locale_defaults.h" textual header "__locale_dir/locale_base_api/bsd_locale_fallbacks.h" + textual header "__locale_dir/locale_base_api/freebsd.h" textual header "__locale_dir/locale_base_api/fuchsia.h" textual header "__locale_dir/locale_base_api/ibm.h" textual header "__locale_dir/locale_base_api/musl.h" From 0df70c28d25aedbedbb4c89f2c03a0e10ec0e8f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20R=C3=B8nne=20Petersen?= Date: Sat, 26 Oct 2024 16:53:01 +0200 Subject: [PATCH 089/425] [llvm][SystemZ] Remove some leftover code from #106014. NFC. (#113761) Pointed out by @redstar here: https://github.com/llvm/llvm-project/pull/106014/files#r1816845388 --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp index 8fbd05eab5f6ee2..f2fa7e7c9f9fee6 100644 --- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -1450,11 +1450,6 @@ void SystemZXPLINKFrameLowering::inlineStackProbe( } bool SystemZXPLINKFrameLowering::hasFPImpl(const MachineFunction &MF) const { - // Naked functions have no stack frame pushed, so we don't have a frame - // pointer. - if (MF.getFunction().hasFnAttribute(Attribute::Naked)) - return false; - return (MF.getFrameInfo().hasVarSizedObjects()); } From 0f4b3c409fbd756d826c89d5539d9ea22bcc56aa Mon Sep 17 00:00:00 2001 From: lntue Date: Sat, 26 Oct 2024 10:55:20 -0400 Subject: [PATCH 090/425] [libc][math] Add tests and fix some issues with FTZ/DAZ modes. (#113744) --- libc/src/math/generic/atan2.cpp | 4 +-- libc/src/math/generic/cbrt.cpp | 5 +-- libc/src/math/generic/cbrtf.cpp | 5 +-- libc/src/math/generic/log.cpp | 2 +- libc/src/math/generic/log10.cpp | 2 +- libc/src/math/generic/log10f.cpp | 2 +- libc/src/math/generic/log1p.cpp | 6 ++-- libc/src/math/generic/log2.cpp | 2 +- libc/src/math/generic/log2f.cpp | 2 +- libc/src/math/generic/logf.cpp | 2 +- libc/src/math/generic/pow.cpp | 16 +++++----- libc/src/math/generic/powf.cpp | 25 ++++++++------- libc/src/math/generic/sin.cpp | 2 +- libc/src/math/generic/tan.cpp | 2 +- libc/test/src/math/smoke/HypotTest.h | 4 +-- libc/test/src/math/smoke/acosf_test.cpp | 24 +++++++++++++++ libc/test/src/math/smoke/acoshf_test.cpp | 24 +++++++++++++++ libc/test/src/math/smoke/asinf_test.cpp | 24 +++++++++++++++ libc/test/src/math/smoke/asinhf_test.cpp | 24 +++++++++++++++ libc/test/src/math/smoke/atan2_test.cpp | 37 +++++++++++++++++++++++ libc/test/src/math/smoke/atanf_test.cpp | 24 +++++++++++++++ libc/test/src/math/smoke/atanhf_test.cpp | 24 +++++++++++++++ libc/test/src/math/smoke/cbrt_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/cbrtf_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/cos_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/cosf_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/coshf_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/cospif_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/erff_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/exp10_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/exp10f_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/exp2_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/exp2f_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/exp2m1f_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/exp_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/expf_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/expm1_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/expm1f_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/hypotf_test.cpp | 34 +++++++++++++++++++++ libc/test/src/math/smoke/log10_test.cpp | 26 ++++++++++++++++ libc/test/src/math/smoke/log10f_test.cpp | 26 ++++++++++++++++ libc/test/src/math/smoke/log1p_test.cpp | 24 +++++++++++++++ libc/test/src/math/smoke/log1pf_test.cpp | 24 +++++++++++++++ libc/test/src/math/smoke/log2_test.cpp | 26 ++++++++++++++++ libc/test/src/math/smoke/log2f_test.cpp | 25 +++++++++++++++ libc/test/src/math/smoke/log_test.cpp | 26 ++++++++++++++++ libc/test/src/math/smoke/logf_test.cpp | 25 +++++++++++++++ libc/test/src/math/smoke/pow_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/powf_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/sin_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/sinf_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/sinhf_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/sinpif_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/tan_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/tanf_test.cpp | 27 +++++++++++++++++ libc/test/src/math/smoke/tanhf_test.cpp | 27 +++++++++++++++++ 56 files changed, 1135 insertions(+), 38 deletions(-) diff --git a/libc/src/math/generic/atan2.cpp b/libc/src/math/generic/atan2.cpp index c39deebca4d40ec..1b16e15d29d0b31 100644 --- a/libc/src/math/generic/atan2.cpp +++ b/libc/src/math/generic/atan2.cpp @@ -230,8 +230,8 @@ LLVM_LIBC_FUNCTION(double, atan2, (double y, double x)) { if (LIBC_UNLIKELY(max_exp > 0x7ffU - 128U || min_exp < 128U)) { if (x_bits.is_nan() || y_bits.is_nan()) return FPBits::quiet_nan().get_val(); - unsigned x_except = x_abs == 0 ? 0 : (FPBits(x_abs).is_inf() ? 2 : 1); - unsigned y_except = y_abs == 0 ? 0 : (FPBits(y_abs).is_inf() ? 2 : 1); + unsigned x_except = x == 0.0 ? 0 : (FPBits(x_abs).is_inf() ? 2 : 1); + unsigned y_except = y == 0.0 ? 0 : (FPBits(y_abs).is_inf() ? 2 : 1); // Exceptional cases: // EXCEPT[y_except][x_except][x_is_neg] diff --git a/libc/src/math/generic/cbrt.cpp b/libc/src/math/generic/cbrt.cpp index 4fa24c54fdeecf0..ee7d69b2c211fac 100644 --- a/libc/src/math/generic/cbrt.cpp +++ b/libc/src/math/generic/cbrt.cpp @@ -151,9 +151,10 @@ LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() || x_abs >= FPBits::inf().uintval())) { - if (x_abs == 0 || x_abs >= FPBits::inf().uintval()) + if (x == 0.0 || x_abs >= FPBits::inf().uintval()) // x is 0, Inf, or NaN. - return x; + // Make sure it works for FTZ/DAZ modes. + return static_cast(x + x); // x is non-zero denormal number. // Normalize x. diff --git a/libc/src/math/generic/cbrtf.cpp b/libc/src/math/generic/cbrtf.cpp index 313961bf356b830..0abbf6e879421c0 100644 --- a/libc/src/math/generic/cbrtf.cpp +++ b/libc/src/math/generic/cbrtf.cpp @@ -93,9 +93,10 @@ LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) { uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff; uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN; - if (LIBC_UNLIKELY(x_abs == 0 || x_abs >= 0x7f80'0000)) { + if (LIBC_UNLIKELY(x == 0.0f || x_abs >= 0x7f80'0000)) { // x is 0, Inf, or NaN. - return x; + // Make sure it works for FTZ/DAZ modes. + return x + x; } double xd = static_cast(x); diff --git a/libc/src/math/generic/log.cpp b/libc/src/math/generic/log.cpp index 57c70e31730bf6b..4302c64c8abac86 100644 --- a/libc/src/math/generic/log.cpp +++ b/libc/src/math/generic/log.cpp @@ -749,7 +749,7 @@ LLVM_LIBC_FUNCTION(double, log, (double x)) { if (LIBC_UNLIKELY(xbits.uintval() < FPBits_t::min_normal().uintval() || xbits.uintval() > FPBits_t::max_normal().uintval())) { - if (xbits.is_zero()) { + if (x == 0.0) { // return -Inf and raise FE_DIVBYZERO. fputil::set_errno_if_required(ERANGE); fputil::raise_except_if_required(FE_DIVBYZERO); diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp index b99b22b024fe3cc..7df57ef85b81b96 100644 --- a/libc/src/math/generic/log10.cpp +++ b/libc/src/math/generic/log10.cpp @@ -751,7 +751,7 @@ LLVM_LIBC_FUNCTION(double, log10, (double x)) { if (LIBC_UNLIKELY(xbits.uintval() < FPBits_t::min_normal().uintval() || xbits.uintval() > FPBits_t::max_normal().uintval())) { - if (xbits.is_zero()) { + if (x == 0.0) { // return -Inf and raise FE_DIVBYZERO. fputil::set_errno_if_required(ERANGE); fputil::raise_except_if_required(FE_DIVBYZERO); diff --git a/libc/src/math/generic/log10f.cpp b/libc/src/math/generic/log10f.cpp index f7dd85cc08bf036..c635fa4ef9b63fa 100644 --- a/libc/src/math/generic/log10f.cpp +++ b/libc/src/math/generic/log10f.cpp @@ -164,7 +164,7 @@ LLVM_LIBC_FUNCTION(float, log10f, (float x)) { if (LIBC_UNLIKELY(x_u < FPBits::min_normal().uintval() || x_u > FPBits::max_normal().uintval())) { - if (xbits.is_zero()) { + if (x == 0.0f) { // Return -inf and raise FE_DIVBYZERO fputil::set_errno_if_required(ERANGE); fputil::raise_except_if_required(FE_DIVBYZERO); diff --git a/libc/src/math/generic/log1p.cpp b/libc/src/math/generic/log1p.cpp index f301a5aba3a57c4..43eb8a924aef476 100644 --- a/libc/src/math/generic/log1p.cpp +++ b/libc/src/math/generic/log1p.cpp @@ -927,8 +927,8 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) { // log(1 + x) = nextafter(x, -inf) for FE_DOWNWARD, or // FE_TOWARDZERO and x > 0, // = x otherwise. - if (LIBC_UNLIKELY(xbits.is_zero())) - return x; + if (x == 0.0) + return x + x; // Handle FTZ/DAZ correctly. volatile float tp = 1.0f; volatile float tn = -1.0f; @@ -943,7 +943,7 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) { return FPBits_t(x_u + 1).get_val(); } - return x; + return (x + x == 0.0) ? x + x : x; } x_dd = fputil::exact_add(1.0, x); } diff --git a/libc/src/math/generic/log2.cpp b/libc/src/math/generic/log2.cpp index 7d868e2f6f61985..37ea0c8f1343155 100644 --- a/libc/src/math/generic/log2.cpp +++ b/libc/src/math/generic/log2.cpp @@ -871,7 +871,7 @@ LLVM_LIBC_FUNCTION(double, log2, (double x)) { if (LIBC_UNLIKELY(xbits.uintval() < FPBits_t::min_normal().uintval() || xbits.uintval() > FPBits_t::max_normal().uintval())) { - if (xbits.is_zero()) { + if (x == 0.0) { // return -Inf and raise FE_DIVBYZERO. fputil::set_errno_if_required(ERANGE); fputil::raise_except_if_required(FE_DIVBYZERO); diff --git a/libc/src/math/generic/log2f.cpp b/libc/src/math/generic/log2f.cpp index 9cad02d796b189c..111f3f130bcab18 100644 --- a/libc/src/math/generic/log2f.cpp +++ b/libc/src/math/generic/log2f.cpp @@ -72,7 +72,7 @@ LLVM_LIBC_FUNCTION(float, log2f, (float x)) { // Exceptional inputs. if (LIBC_UNLIKELY(x_u < FPBits::min_normal().uintval() || x_u > FPBits::max_normal().uintval())) { - if (xbits.is_zero()) { + if (x == 0.0f) { fputil::set_errno_if_required(ERANGE); fputil::raise_except_if_required(FE_DIVBYZERO); return FPBits::inf(Sign::NEG).get_val(); diff --git a/libc/src/math/generic/logf.cpp b/libc/src/math/generic/logf.cpp index f8ecf320568ac71..30c00edafe21d86 100644 --- a/libc/src/math/generic/logf.cpp +++ b/libc/src/math/generic/logf.cpp @@ -82,7 +82,7 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) { } // Subnormal inputs. if (LIBC_UNLIKELY(x_u < FPBits::min_normal().uintval())) { - if (x_u == 0) { + if (x == 0.0f) { // Return -inf and raise FE_DIVBYZERO fputil::set_errno_if_required(ERANGE); fputil::raise_except_if_required(FE_DIVBYZERO); diff --git a/libc/src/math/generic/pow.cpp b/libc/src/math/generic/pow.cpp index 181d3d40b3c9adf..213dbd959039c30 100644 --- a/libc/src/math/generic/pow.cpp +++ b/libc/src/math/generic/pow.cpp @@ -228,16 +228,18 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) { x_u >= FPBits::inf().uintval() || x_u < FPBits::min_normal().uintval())) { // Exceptional exponents. - switch (y_a) { - case 0: // y = +-0.0 + if (y == 0.0) return 1.0; + + switch (y_a) { case 0x3fe0'0000'0000'0000: { // y = +-0.5 // TODO: speed up x^(-1/2) with rsqrt(x) when available. - if (LIBC_UNLIKELY(!y_sign && (x_u == FPBits::zero(Sign::NEG).uintval() || - x_u == FPBits::inf(Sign::NEG).uintval()))) { + if (LIBC_UNLIKELY( + (x == 0.0 || x_u == FPBits::inf(Sign::NEG).uintval()))) { // pow(-0, 1/2) = +0 // pow(-inf, 1/2) = +inf - return FPBits(x_abs).get_val(); + // Make sure it works correctly for FTZ/DAZ. + return y_sign ? 1.0 / (x * x) : (x * x); } return y_sign ? (1.0 / fputil::sqrt(x)) : fputil::sqrt(x); } @@ -269,7 +271,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) { return 1.0; } - if (x_a == 0 && y_sign) { + if (x == 0.0 && y_sign) { // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO fputil::set_errno_if_required(EDOM); fputil::raise_except_if_required(FE_DIVBYZERO); @@ -298,7 +300,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) { // TODO: Speed things up with pow(2, y) = exp2(y) and pow(10, y) = exp10(y). - if (x_a == 0) { + if (x == 0.0) { bool out_is_neg = x_sign && is_odd_integer(y); if (y_sign) { // pow(0, negative number) = inf diff --git a/libc/src/math/generic/powf.cpp b/libc/src/math/generic/powf.cpp index 83477c6ef2aceba..c84ce0da34b10a3 100644 --- a/libc/src/math/generic/powf.cpp +++ b/libc/src/math/generic/powf.cpp @@ -529,10 +529,10 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) { // Hence x^y will either overflow or underflow if x is not zero. if (LIBC_UNLIKELY((y_abs & 0x0007'ffff) == 0) || (y_abs > 0x4f170000)) { // Exceptional exponents. - switch (y_abs) { - case 0x0000'0000: { // y = +-0.0f + if (y == 0.0f) return 1.0f; - } + + switch (y_abs) { case 0x7f80'0000: { // y = +-Inf if (x_abs > 0x7f80'0000) { // pow(NaN, +-Inf) = NaN @@ -542,7 +542,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) { // pow(+-1, +-Inf) = 1.0f return 1.0f; } - if (x_abs == 0 && y_u == 0xff80'0000) { + if (x == 0.0f && y_u == 0xff80'0000) { // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO fputil::set_errno_if_required(EDOM); fputil::raise_except_if_required(FE_DIVBYZERO); @@ -561,12 +561,15 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) { switch (y_u) { case 0x3f00'0000: // y = 0.5f // pow(x, 1/2) = sqrt(x) - if (LIBC_UNLIKELY(x_u == 0x8000'0000 || x_u == 0xff80'0000)) { + if (LIBC_UNLIKELY(x == 0.0f || x_u == 0xff80'0000)) { // pow(-0, 1/2) = +0 // pow(-inf, 1/2) = +inf - return FloatBits(x_abs).get_val(); + // Make sure it is correct for FTZ/DAZ. + return x * x; } - return fputil::sqrt(x); + float r; + r = fputil::sqrt(x); + return (FloatBits(r).uintval() != 0x8000'0000) ? r : 0.0f; case 0x3f80'0000: // y = 1.0f return x; case 0x4000'0000: // y = 2.0f @@ -634,8 +637,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) { const bool x_is_neg = x_u >= FloatBits::SIGN_MASK; - switch (x_abs) { - case 0x0000'0000: { // x = +-0.0f + if (x == 0.0f) { const bool out_is_neg = x_is_neg && is_odd_integer(FloatBits(y_u).get_val()); if (y_u > 0x8000'0000U) { @@ -647,7 +649,9 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) { // pow(0, positive number) = 0 return out_is_neg ? -0.0f : 0.0f; } - case 0x7f80'0000: { // x = +-Inf + + if (x_abs == 0x7f80'0000) { + // x = +-Inf const bool out_is_neg = x_is_neg && is_odd_integer(FloatBits(y_u).get_val()); if (y_u >= FloatBits::SIGN_MASK) { @@ -655,7 +659,6 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) { } return FloatBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val(); } - } if (x_abs > 0x7f80'0000) { // x is NaN. diff --git a/libc/src/math/generic/sin.cpp b/libc/src/math/generic/sin.cpp index 2e1d3ffd5f37d80..b32486dff487cad 100644 --- a/libc/src/math/generic/sin.cpp +++ b/libc/src/math/generic/sin.cpp @@ -50,7 +50,7 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) { if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 26)) { // Signed zeros. if (LIBC_UNLIKELY(x == 0.0)) - return x; + return x + x; // Make sure it works with FTZ/DAZ. #ifdef LIBC_TARGET_CPU_HAS_FMA return fputil::multiply_add(x, -0x1.0p-54, x); diff --git a/libc/src/math/generic/tan.cpp b/libc/src/math/generic/tan.cpp index f9be25ed866e1d0..19d31a8441efb68 100644 --- a/libc/src/math/generic/tan.cpp +++ b/libc/src/math/generic/tan.cpp @@ -138,7 +138,7 @@ LLVM_LIBC_FUNCTION(double, tan, (double x)) { if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) { // Signed zeros. if (LIBC_UNLIKELY(x == 0.0)) - return x; + return x + x; // Make sure it works with FTZ/DAZ. #ifdef LIBC_TARGET_CPU_HAS_FMA return fputil::multiply_add(x, 0x1.0p-54, x); diff --git a/libc/test/src/math/smoke/HypotTest.h b/libc/test/src/math/smoke/HypotTest.h index d7c62dcbeb0edb1..30d57a4fe2a2672 100644 --- a/libc/test/src/math/smoke/HypotTest.h +++ b/libc/test/src/math/smoke/HypotTest.h @@ -14,13 +14,11 @@ #include "test/UnitTest/Test.h" template -class HypotTestTemplate : public LIBC_NAMESPACE::testing::Test { -private: +struct HypotTestTemplate : public LIBC_NAMESPACE::testing::Test { using Func = T (*)(T, T); DECLARE_SPECIAL_CONSTANTS(T) -public: void test_special_numbers(Func func) { constexpr int N = 4; // Pythagorean triples. diff --git a/libc/test/src/math/smoke/acosf_test.cpp b/libc/test/src/math/smoke/acosf_test.cpp index 039d8c2013830d1..e5d56c70f27221d 100644 --- a/libc/test/src/math/smoke/acosf_test.cpp +++ b/libc/test/src/math/smoke/acosf_test.cpp @@ -38,3 +38,27 @@ TEST_F(LlvmLibcAcosfTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acosf(-2.0f)); EXPECT_MATH_ERRNO(EDOM); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcAcosfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0x1.921fb6p0f, LIBC_NAMESPACE::acosf(min_denormal)); +} + +TEST_F(LlvmLibcAcosfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0x1.921fb6p0f, LIBC_NAMESPACE::acosf(min_denormal)); +} + +TEST_F(LlvmLibcAcosfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0x1.921fb6p0f, LIBC_NAMESPACE::acosf(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/acoshf_test.cpp b/libc/test/src/math/smoke/acoshf_test.cpp index 91d433df80558d5..c4e88259919c3cc 100644 --- a/libc/test/src/math/smoke/acoshf_test.cpp +++ b/libc/test/src/math/smoke/acoshf_test.cpp @@ -35,3 +35,27 @@ TEST_F(LlvmLibcAcoshfTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acoshf(neg_inf)); EXPECT_MATH_ERRNO(EDOM); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcAcoshfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::acoshf(min_denormal)); +} + +TEST_F(LlvmLibcAcoshfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::acoshf(min_denormal)); +} + +TEST_F(LlvmLibcAcoshfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::acoshf(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/asinf_test.cpp b/libc/test/src/math/smoke/asinf_test.cpp index 450255ccd3020dc..ce1576e2b57dfca 100644 --- a/libc/test/src/math/smoke/asinf_test.cpp +++ b/libc/test/src/math/smoke/asinf_test.cpp @@ -41,3 +41,27 @@ TEST_F(LlvmLibcAsinfTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::asinf(-2.0f)); EXPECT_MATH_ERRNO(EDOM); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcAsinfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinf(min_denormal)); +} + +TEST_F(LlvmLibcAsinfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinf(min_denormal)); +} + +TEST_F(LlvmLibcAsinfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinf(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/asinhf_test.cpp b/libc/test/src/math/smoke/asinhf_test.cpp index a8e54f379a1fd01..5b83ce6466113f9 100644 --- a/libc/test/src/math/smoke/asinhf_test.cpp +++ b/libc/test/src/math/smoke/asinhf_test.cpp @@ -35,3 +35,27 @@ TEST_F(LlvmLibcAsinhfTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(neg_inf, LIBC_NAMESPACE::asinhf(neg_inf)); EXPECT_MATH_ERRNO(0); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcAsinhfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinhf(min_denormal)); +} + +TEST_F(LlvmLibcAsinhfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinhf(min_denormal)); +} + +TEST_F(LlvmLibcAsinhfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinhf(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/atan2_test.cpp b/libc/test/src/math/smoke/atan2_test.cpp index 61dd6cab1049fe2..1606c3f378cb88c 100644 --- a/libc/test/src/math/smoke/atan2_test.cpp +++ b/libc/test/src/math/smoke/atan2_test.cpp @@ -20,3 +20,40 @@ TEST_F(LlvmLibcAtan2Test, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(0.0, LIBC_NAMESPACE::atan2(1.0, inf)); EXPECT_FP_EQ_ALL_ROUNDING(-0.0, LIBC_NAMESPACE::atan2(-1.0, inf)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcAtan2Test, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0x1.921fb54442d18p-1, + LIBC_NAMESPACE::atan2(min_denormal, min_denormal)); + EXPECT_FP_EQ(0x1.0000000000001p-52, + LIBC_NAMESPACE::atan2(min_denormal, max_denormal)); + EXPECT_FP_EQ(0x1.921fb54442d17p0, + LIBC_NAMESPACE::atan2(max_denormal, min_denormal)); + EXPECT_FP_EQ(0x1.921fb54442d18p-1, + LIBC_NAMESPACE::atan2(max_denormal, max_denormal)); +} + +TEST_F(LlvmLibcAtan2Test, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(min_denormal, min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(min_denormal, max_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(max_denormal, min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(max_denormal, max_denormal)); +} + +TEST_F(LlvmLibcAtan2Test, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(min_denormal, min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(min_denormal, max_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(max_denormal, min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(max_denormal, max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/atanf_test.cpp b/libc/test/src/math/smoke/atanf_test.cpp index 0fe11d79533810c..346b8e8abd19919 100644 --- a/libc/test/src/math/smoke/atanf_test.cpp +++ b/libc/test/src/math/smoke/atanf_test.cpp @@ -42,3 +42,27 @@ TEST_F(LlvmLibcAtanfTest, SpecialNumbers) { // EXPECT_FP_EXCEPTION(0); EXPECT_MATH_ERRNO(0); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcAtanfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanf(min_denormal)); +} + +TEST_F(LlvmLibcAtanfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanf(min_denormal)); +} + +TEST_F(LlvmLibcAtanfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanf(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/atanhf_test.cpp b/libc/test/src/math/smoke/atanhf_test.cpp index e22926bd2f03762..8300b47ea9a3151 100644 --- a/libc/test/src/math/smoke/atanhf_test.cpp +++ b/libc/test/src/math/smoke/atanhf_test.cpp @@ -76,3 +76,27 @@ TEST_F(LlvmLibcAtanhfTest, SpecialNumbers) { EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::atanhf(neg_inf), FE_INVALID); EXPECT_MATH_ERRNO(EDOM); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcAtanhfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanhf(min_denormal)); +} + +TEST_F(LlvmLibcAtanhfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanhf(min_denormal)); +} + +TEST_F(LlvmLibcAtanhfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanhf(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/cbrt_test.cpp b/libc/test/src/math/smoke/cbrt_test.cpp index d57cdb20de27460..092e6dd1aeed32b 100644 --- a/libc/test/src/math/smoke/cbrt_test.cpp +++ b/libc/test/src/math/smoke/cbrt_test.cpp @@ -35,3 +35,30 @@ TEST_F(LlvmLibcCbrtTest, SpecialNumbers) { EXPECT_FP_EQ(-0x1.0p-340, LIBC_NAMESPACE::cbrt(-0x1.fffffffffffffp-1021)); EXPECT_FP_EQ(2.0, LIBC_NAMESPACE::cbrt(0x1.fffffffffffffp2)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcCbrtTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0x1.0p-358, LIBC_NAMESPACE::cbrt(min_denormal)); + EXPECT_FP_EQ(0x1.428a2f98d728ap-341, LIBC_NAMESPACE::cbrt(max_denormal)); +} + +TEST_F(LlvmLibcCbrtTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::cbrt(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::cbrt(max_denormal)); +} + +TEST_F(LlvmLibcCbrtTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::cbrt(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::cbrt(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/cbrtf_test.cpp b/libc/test/src/math/smoke/cbrtf_test.cpp index a68e57744bd0e78..202a5ce0733585d 100644 --- a/libc/test/src/math/smoke/cbrtf_test.cpp +++ b/libc/test/src/math/smoke/cbrtf_test.cpp @@ -31,3 +31,30 @@ TEST_F(LlvmLibcCbrtfTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(0x1.0p42f, LIBC_NAMESPACE::cbrtf(0x1.0p126f)); EXPECT_FP_EQ_ALL_ROUNDING(-0x1.0p42f, LIBC_NAMESPACE::cbrtf(-0x1.0p126f)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcCbrtfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0x1.428a3p-50f, LIBC_NAMESPACE::cbrtf(min_denormal)); + EXPECT_FP_EQ(0x1.fffffep-43f, LIBC_NAMESPACE::cbrtf(max_denormal)); +} + +TEST_F(LlvmLibcCbrtfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::cbrtf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::cbrtf(max_denormal)); +} + +TEST_F(LlvmLibcCbrtfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::cbrtf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::cbrtf(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/cos_test.cpp b/libc/test/src/math/smoke/cos_test.cpp index 81c8612dba26e5c..88d8ead1af99221 100644 --- a/libc/test/src/math/smoke/cos_test.cpp +++ b/libc/test/src/math/smoke/cos_test.cpp @@ -24,3 +24,30 @@ TEST_F(LlvmLibcCosTest, SpecialNumbers) { EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_normal)); EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_denormal)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcCosTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(max_denormal)); +} + +TEST_F(LlvmLibcCosTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(max_denormal)); +} + +TEST_F(LlvmLibcCosTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/cosf_test.cpp b/libc/test/src/math/smoke/cosf_test.cpp index 62f7ede9cf17810..2e261f9fac3c0cf 100644 --- a/libc/test/src/math/smoke/cosf_test.cpp +++ b/libc/test/src/math/smoke/cosf_test.cpp @@ -35,3 +35,30 @@ TEST_F(LlvmLibcCosfTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::cosf(neg_inf)); EXPECT_MATH_ERRNO(EDOM); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcCosfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(max_denormal)); +} + +TEST_F(LlvmLibcCosfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(max_denormal)); +} + +TEST_F(LlvmLibcCosfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/coshf_test.cpp b/libc/test/src/math/smoke/coshf_test.cpp index ddaa19f4c392f7d..fd1556b10116d9a 100644 --- a/libc/test/src/math/smoke/coshf_test.cpp +++ b/libc/test/src/math/smoke/coshf_test.cpp @@ -51,3 +51,30 @@ TEST_F(LlvmLibcCoshfTest, Overflow) { inf, LIBC_NAMESPACE::coshf(FPBits(0x42d00008U).get_val()), FE_OVERFLOW); EXPECT_MATH_ERRNO(ERANGE); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcCoshfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(max_denormal)); +} + +TEST_F(LlvmLibcCoshfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(max_denormal)); +} + +TEST_F(LlvmLibcCoshfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/cospif_test.cpp b/libc/test/src/math/smoke/cospif_test.cpp index 007c4c45e3b1570..bf6d86bcfe623af 100644 --- a/libc/test/src/math/smoke/cospif_test.cpp +++ b/libc/test/src/math/smoke/cospif_test.cpp @@ -32,3 +32,30 @@ TEST_F(LlvmLibcCospifTest, SpecialNumbers) { EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::cospif(neg_inf)); EXPECT_MATH_ERRNO(EDOM); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcCospifTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(max_denormal)); +} + +TEST_F(LlvmLibcCospifTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(max_denormal)); +} + +TEST_F(LlvmLibcCospifTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/erff_test.cpp b/libc/test/src/math/smoke/erff_test.cpp index 8a970f3a4b7ed19..7d2c1013752c7c0 100644 --- a/libc/test/src/math/smoke/erff_test.cpp +++ b/libc/test/src/math/smoke/erff_test.cpp @@ -23,3 +23,30 @@ TEST_F(LlvmLibcErffTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::erff(zero)); EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::erff(neg_zero)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcErffTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(min_denormal)); + EXPECT_FP_EQ(0x1.20dd72p-126f, LIBC_NAMESPACE::erff(max_denormal)); +} + +TEST_F(LlvmLibcErffTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(max_denormal)); +} + +TEST_F(LlvmLibcErffTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/exp10_test.cpp b/libc/test/src/math/smoke/exp10_test.cpp index 282ddc987b49933..ca9fc359edeb5a4 100644 --- a/libc/test/src/math/smoke/exp10_test.cpp +++ b/libc/test/src/math/smoke/exp10_test.cpp @@ -32,3 +32,30 @@ TEST_F(LlvmLibcExp10Test, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(100.0, LIBC_NAMESPACE::exp10(2.0)); EXPECT_FP_EQ_ALL_ROUNDING(1000.0, LIBC_NAMESPACE::exp10(3.0)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcExp10Test, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(max_denormal)); +} + +TEST_F(LlvmLibcExp10Test, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(max_denormal)); +} + +TEST_F(LlvmLibcExp10Test, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/exp10f_test.cpp b/libc/test/src/math/smoke/exp10f_test.cpp index 9fb15ae75348bb8..bcbfc96efd72689 100644 --- a/libc/test/src/math/smoke/exp10f_test.cpp +++ b/libc/test/src/math/smoke/exp10f_test.cpp @@ -54,3 +54,30 @@ TEST_F(LlvmLibcExp10fTest, Overflow) { inf, LIBC_NAMESPACE::exp10f(FPBits(0x43000001U).get_val()), FE_OVERFLOW); EXPECT_MATH_ERRNO(ERANGE); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcExp10fTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(max_denormal)); +} + +TEST_F(LlvmLibcExp10fTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(max_denormal)); +} + +TEST_F(LlvmLibcExp10fTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/exp2_test.cpp b/libc/test/src/math/smoke/exp2_test.cpp index d148d27fad38dc5..d97a384367a09f9 100644 --- a/libc/test/src/math/smoke/exp2_test.cpp +++ b/libc/test/src/math/smoke/exp2_test.cpp @@ -31,3 +31,30 @@ TEST_F(LlvmLibcExp2Test, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(4.0, LIBC_NAMESPACE::exp2(2.0)); EXPECT_FP_EQ_ALL_ROUNDING(0.25, LIBC_NAMESPACE::exp2(-2.0)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcExp2Test, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(max_denormal)); +} + +TEST_F(LlvmLibcExp2Test, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(max_denormal)); +} + +TEST_F(LlvmLibcExp2Test, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/exp2f_test.cpp b/libc/test/src/math/smoke/exp2f_test.cpp index 39228eb2f6d8ba2..d9cdecbf0fe9ba4 100644 --- a/libc/test/src/math/smoke/exp2f_test.cpp +++ b/libc/test/src/math/smoke/exp2f_test.cpp @@ -55,3 +55,30 @@ TEST_F(LlvmLibcExp2fTest, Overflow) { inf, LIBC_NAMESPACE::exp2f(FPBits(0x43000001U).get_val()), FE_OVERFLOW); EXPECT_MATH_ERRNO(ERANGE); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcExp2fTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(max_denormal)); +} + +TEST_F(LlvmLibcExp2fTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(max_denormal)); +} + +TEST_F(LlvmLibcExp2fTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/exp2m1f_test.cpp b/libc/test/src/math/smoke/exp2m1f_test.cpp index 2df435385247289..4657d088f07a893 100644 --- a/libc/test/src/math/smoke/exp2m1f_test.cpp +++ b/libc/test/src/math/smoke/exp2m1f_test.cpp @@ -61,3 +61,30 @@ TEST_F(LlvmLibcExp2m1fTest, Underflow) { FE_UNDERFLOW); EXPECT_MATH_ERRNO(ERANGE); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcExp2m1fTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(max_denormal)); +} + +TEST_F(LlvmLibcExp2m1fTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(max_denormal)); +} + +TEST_F(LlvmLibcExp2m1fTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/exp_test.cpp b/libc/test/src/math/smoke/exp_test.cpp index 5fe6f3e92f4a6a6..d2467ff8838969f 100644 --- a/libc/test/src/math/smoke/exp_test.cpp +++ b/libc/test/src/math/smoke/exp_test.cpp @@ -27,3 +27,30 @@ TEST_F(LlvmLibcExpTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(1.0, LIBC_NAMESPACE::exp(0.0)); EXPECT_FP_EQ_ALL_ROUNDING(1.0, LIBC_NAMESPACE::exp(-0.0)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcExpTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(max_denormal)); +} + +TEST_F(LlvmLibcExpTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(max_denormal)); +} + +TEST_F(LlvmLibcExpTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(min_denormal)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/expf_test.cpp b/libc/test/src/math/smoke/expf_test.cpp index b954125afd7bba0..11181ed1402c9ee 100644 --- a/libc/test/src/math/smoke/expf_test.cpp +++ b/libc/test/src/math/smoke/expf_test.cpp @@ -50,3 +50,30 @@ TEST_F(LlvmLibcExpfTest, Overflow) { inf, LIBC_NAMESPACE::expf(FPBits(0x42d00008U).get_val()), FE_OVERFLOW); EXPECT_MATH_ERRNO(ERANGE); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcExpfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(max_denormal)); +} + +TEST_F(LlvmLibcExpfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(max_denormal)); +} + +TEST_F(LlvmLibcExpfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(min_denormal)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/expm1_test.cpp b/libc/test/src/math/smoke/expm1_test.cpp index bafdbda8af03bdb..cebd2d757606b0f 100644 --- a/libc/test/src/math/smoke/expm1_test.cpp +++ b/libc/test/src/math/smoke/expm1_test.cpp @@ -33,3 +33,30 @@ TEST_F(LlvmLibcExpm1Test, SpecialNumbers) { // log(2^-54) EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::expm1(-0x1.2b708872320e2p5)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcExpm1Test, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(max_denormal)); +} + +TEST_F(LlvmLibcExpm1Test, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(max_denormal)); +} + +TEST_F(LlvmLibcExpm1Test, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/expm1f_test.cpp b/libc/test/src/math/smoke/expm1f_test.cpp index 03b6e47b7c3bc44..f4138aa05ba7e3a 100644 --- a/libc/test/src/math/smoke/expm1f_test.cpp +++ b/libc/test/src/math/smoke/expm1f_test.cpp @@ -50,3 +50,30 @@ TEST_F(LlvmLibcExpm1fTest, Overflow) { inf, LIBC_NAMESPACE::expm1f(FPBits(0x42d00008U).get_val()), FE_OVERFLOW); EXPECT_MATH_ERRNO(ERANGE); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcExpm1fTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(max_denormal)); +} + +TEST_F(LlvmLibcExpm1fTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(max_denormal)); +} + +TEST_F(LlvmLibcExpm1fTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/hypotf_test.cpp b/libc/test/src/math/smoke/hypotf_test.cpp index 768e7f75e9d678e..62399489987e7ea 100644 --- a/libc/test/src/math/smoke/hypotf_test.cpp +++ b/libc/test/src/math/smoke/hypotf_test.cpp @@ -15,3 +15,37 @@ using LlvmLibcHypotfTest = HypotTestTemplate; TEST_F(LlvmLibcHypotfTest, SpecialNumbers) { test_special_numbers(&LIBC_NAMESPACE::hypotf); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcHypotfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, max_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, min_denormal)); + EXPECT_FP_EQ(0x1.6a09e4p-126f, + LIBC_NAMESPACE::hypotf(max_denormal, max_denormal)); +} + +TEST_F(LlvmLibcHypotfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, max_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, max_denormal)); +} + +TEST_F(LlvmLibcHypotfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, max_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/log10_test.cpp b/libc/test/src/math/smoke/log10_test.cpp index e03416ae20c8f37..9f159f282aad86c 100644 --- a/libc/test/src/math/smoke/log10_test.cpp +++ b/libc/test/src/math/smoke/log10_test.cpp @@ -33,3 +33,29 @@ TEST_F(LlvmLibcLog10Test, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(static_cast(i), LIBC_NAMESPACE::log10(x)); } } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcLog10Test, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(-0x1.434e6420f4374p8, LIBC_NAMESPACE::log10(min_denormal)); +} + +TEST_F(LlvmLibcLog10Test, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log10(min_denormal)); +} + +TEST_F(LlvmLibcLog10Test, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log10(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/log10f_test.cpp b/libc/test/src/math/smoke/log10f_test.cpp index 2524545e0181236..4e3bf654ca918a9 100644 --- a/libc/test/src/math/smoke/log10f_test.cpp +++ b/libc/test/src/math/smoke/log10f_test.cpp @@ -32,3 +32,29 @@ TEST_F(LlvmLibcLog10fTest, SpecialNumbers) { EXPECT_FP_EQ_ALL_ROUNDING(static_cast(i), LIBC_NAMESPACE::log10f(x)); } } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcLog10fTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(-0x1.66d3e7bd9a403p5f, LIBC_NAMESPACE::log10f(min_denormal)); +} + +TEST_F(LlvmLibcLog10fTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log10f(min_denormal)); +} + +TEST_F(LlvmLibcLog10fTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log10f(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/log1p_test.cpp b/libc/test/src/math/smoke/log1p_test.cpp index 63237f3259b2151..eba65f56df73964 100644 --- a/libc/test/src/math/smoke/log1p_test.cpp +++ b/libc/test/src/math/smoke/log1p_test.cpp @@ -27,3 +27,27 @@ TEST_F(LlvmLibcLog1pTest, SpecialNumbers) { EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, LIBC_NAMESPACE::log1p(-1.0), FE_DIVBYZERO); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcLog1pTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal)); +} + +TEST_F(LlvmLibcLog1pTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal)); +} + +TEST_F(LlvmLibcLog1pTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/log1pf_test.cpp b/libc/test/src/math/smoke/log1pf_test.cpp index c14d65529469792..1b0a1d589e684b0 100644 --- a/libc/test/src/math/smoke/log1pf_test.cpp +++ b/libc/test/src/math/smoke/log1pf_test.cpp @@ -26,3 +26,27 @@ TEST_F(LlvmLibcLog1pfTest, SpecialNumbers) { EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, LIBC_NAMESPACE::log1pf(-1.0f), FE_DIVBYZERO); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcLog1pfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::log1pf(min_denormal)); +} + +TEST_F(LlvmLibcLog1pfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::log1pf(min_denormal)); +} + +TEST_F(LlvmLibcLog1pfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::log1pf(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/log2_test.cpp b/libc/test/src/math/smoke/log2_test.cpp index 89d8e5651091185..1570d60556df2c3 100644 --- a/libc/test/src/math/smoke/log2_test.cpp +++ b/libc/test/src/math/smoke/log2_test.cpp @@ -27,3 +27,29 @@ TEST_F(LlvmLibcLog2Test, SpecialNumbers) { EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log2(-1.0), FE_INVALID); EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log2(1.0)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcLog2Test, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(-1074.0, LIBC_NAMESPACE::log2(min_denormal)); +} + +TEST_F(LlvmLibcLog2Test, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log2(min_denormal)); +} + +TEST_F(LlvmLibcLog2Test, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log2(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/log2f_test.cpp b/libc/test/src/math/smoke/log2f_test.cpp index 00bfb7c4abad670..67b2c5b2db13d12 100644 --- a/libc/test/src/math/smoke/log2f_test.cpp +++ b/libc/test/src/math/smoke/log2f_test.cpp @@ -28,3 +28,28 @@ TEST_F(LlvmLibcLog2fTest, SpecialNumbers) { EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log2f(-1.0f), FE_INVALID); EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log2f(1.0f)); } +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcLog2fTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(-149.0f, LIBC_NAMESPACE::log2f(min_denormal)); +} + +TEST_F(LlvmLibcLog2fTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log2f(min_denormal)); +} + +TEST_F(LlvmLibcLog2fTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log2f(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/log_test.cpp b/libc/test/src/math/smoke/log_test.cpp index e7897add575fade..20b974d7e167d74 100644 --- a/libc/test/src/math/smoke/log_test.cpp +++ b/libc/test/src/math/smoke/log_test.cpp @@ -26,3 +26,29 @@ TEST_F(LlvmLibcLogTest, SpecialNumbers) { EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log(-1.0), FE_INVALID); EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log(1.0)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcLogTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(-0x1.74385446d71c3p9, LIBC_NAMESPACE::log(min_denormal)); +} + +TEST_F(LlvmLibcLogTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log(min_denormal)); +} + +TEST_F(LlvmLibcLogTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::log(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/logf_test.cpp b/libc/test/src/math/smoke/logf_test.cpp index a27206027614525..1a3102ae2b14101 100644 --- a/libc/test/src/math/smoke/logf_test.cpp +++ b/libc/test/src/math/smoke/logf_test.cpp @@ -27,3 +27,28 @@ TEST_F(LlvmLibcLogfTest, SpecialNumbers) { EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::logf(-1.0f), FE_INVALID); EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::logf(1.0f)); } +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcLogfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(-0x1.9d1d9fccf477p6f, LIBC_NAMESPACE::logf(min_denormal)); +} + +TEST_F(LlvmLibcLogfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::logf(min_denormal)); +} + +TEST_F(LlvmLibcLogfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(), + LIBC_NAMESPACE::logf(min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/pow_test.cpp b/libc/test/src/math/smoke/pow_test.cpp index 7f0136d783c6ba0..f9db7f102962b93 100644 --- a/libc/test/src/math/smoke/pow_test.cpp +++ b/libc/test/src/math/smoke/pow_test.cpp @@ -190,3 +190,30 @@ TEST_F(LlvmLibcPowTest, SpecialNumbers) { } } } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcPowTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(-min_denormal, 0.5)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(2.0, min_denormal)); +} + +TEST_F(LlvmLibcPowTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::pow(-min_denormal, 0.5)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(2.0, min_denormal)); +} + +TEST_F(LlvmLibcPowTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::pow(-min_denormal, 0.5)); + EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(2.0, min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/powf_test.cpp b/libc/test/src/math/smoke/powf_test.cpp index a0f66f2733a1ea9..9cc95ce0baef9fc 100644 --- a/libc/test/src/math/smoke/powf_test.cpp +++ b/libc/test/src/math/smoke/powf_test.cpp @@ -194,3 +194,30 @@ TEST_F(LlvmLibcPowfTest, SpecialNumbers) { EXPECT_FP_EQ(-0.0f, LIBC_NAMESPACE::powf(-0.015625f, 25.0f)); EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::powf(-0.015625f, 26.0f)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcPowfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf(-min_denormal, 0.5f)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::powf(2.0f, min_denormal)); +} + +TEST_F(LlvmLibcPowfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::powf(-min_denormal, 0.5f)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::powf(2.0f, min_denormal)); +} + +TEST_F(LlvmLibcPowfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::powf(-min_denormal, 0.5f)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::powf(2.0f, min_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/sin_test.cpp b/libc/test/src/math/smoke/sin_test.cpp index 16ced68709ca755..7dd1b7fda625b0d 100644 --- a/libc/test/src/math/smoke/sin_test.cpp +++ b/libc/test/src/math/smoke/sin_test.cpp @@ -24,3 +24,30 @@ TEST_F(LlvmLibcSinTest, SpecialNumbers) { EXPECT_FP_EQ(min_normal, LIBC_NAMESPACE::sin(min_normal)); EXPECT_FP_EQ(min_denormal, LIBC_NAMESPACE::sin(min_denormal)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcSinTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(max_denormal)); +} + +TEST_F(LlvmLibcSinTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(max_denormal)); +} + +TEST_F(LlvmLibcSinTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/sinf_test.cpp b/libc/test/src/math/smoke/sinf_test.cpp index 1bf6eaa8b78d7d3..776c66dcb37bdee 100644 --- a/libc/test/src/math/smoke/sinf_test.cpp +++ b/libc/test/src/math/smoke/sinf_test.cpp @@ -35,3 +35,30 @@ TEST_F(LlvmLibcSinfTest, SpecialNumbers) { EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinf(neg_inf)); EXPECT_MATH_ERRNO(EDOM); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcSinfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(max_denormal)); +} + +TEST_F(LlvmLibcSinfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(max_denormal)); +} + +TEST_F(LlvmLibcSinfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/sinhf_test.cpp b/libc/test/src/math/smoke/sinhf_test.cpp index 635a10627a21096..3cc0656967581ab 100644 --- a/libc/test/src/math/smoke/sinhf_test.cpp +++ b/libc/test/src/math/smoke/sinhf_test.cpp @@ -62,3 +62,30 @@ TEST_F(LlvmLibcSinhfTest, Overflow) { inf, LIBC_NAMESPACE::sinhf(FPBits(0x42d00008U).get_val()), FE_OVERFLOW); EXPECT_MATH_ERRNO(ERANGE); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcSinhfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(max_denormal)); +} + +TEST_F(LlvmLibcSinhfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(max_denormal)); +} + +TEST_F(LlvmLibcSinhfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/sinpif_test.cpp b/libc/test/src/math/smoke/sinpif_test.cpp index 0918294ab3611c2..11bda0b6b28cc77 100644 --- a/libc/test/src/math/smoke/sinpif_test.cpp +++ b/libc/test/src/math/smoke/sinpif_test.cpp @@ -41,3 +41,30 @@ TEST_F(LlvmLibcSinpifTest, Integers) { EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif(0x1.cp+106)); EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif(0x1.cp+21)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcSinpifTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(min_denormal)); + EXPECT_FP_EQ(0x1.921fb2p-125f, LIBC_NAMESPACE::sinpif(max_denormal)); +} + +TEST_F(LlvmLibcSinpifTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(max_denormal)); +} + +TEST_F(LlvmLibcSinpifTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/tan_test.cpp b/libc/test/src/math/smoke/tan_test.cpp index 498dba76b6e7195..aa5c23d65886d29 100644 --- a/libc/test/src/math/smoke/tan_test.cpp +++ b/libc/test/src/math/smoke/tan_test.cpp @@ -24,3 +24,30 @@ TEST_F(LlvmLibcTanTest, SpecialNumbers) { EXPECT_FP_EQ(min_normal, LIBC_NAMESPACE::tan(min_normal)); EXPECT_FP_EQ(min_denormal, LIBC_NAMESPACE::tan(min_denormal)); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcTanTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(max_denormal)); +} + +TEST_F(LlvmLibcTanTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(max_denormal)); +} + +TEST_F(LlvmLibcTanTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(min_denormal)); + EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/tanf_test.cpp b/libc/test/src/math/smoke/tanf_test.cpp index b90c5da8741892a..93fbfded3f66a18 100644 --- a/libc/test/src/math/smoke/tanf_test.cpp +++ b/libc/test/src/math/smoke/tanf_test.cpp @@ -35,3 +35,30 @@ TEST_F(LlvmLibcTanfTest, SpecialNumbers) { EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::tanf(neg_inf)); EXPECT_MATH_ERRNO(EDOM); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcTanfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(max_denormal)); +} + +TEST_F(LlvmLibcTanfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(max_denormal)); +} + +TEST_F(LlvmLibcTanfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(max_denormal)); +} + +#endif diff --git a/libc/test/src/math/smoke/tanhf_test.cpp b/libc/test/src/math/smoke/tanhf_test.cpp index 748e6fe8c62693d..3b7faa81dac2eac 100644 --- a/libc/test/src/math/smoke/tanhf_test.cpp +++ b/libc/test/src/math/smoke/tanhf_test.cpp @@ -35,3 +35,30 @@ TEST_F(LlvmLibcTanhfTest, SpecialNumbers) { EXPECT_FP_EQ(-1.0f, LIBC_NAMESPACE::tanhf(neg_inf)); EXPECT_MATH_ERRNO(0); } + +#ifdef LIBC_TEST_FTZ_DAZ + +using namespace LIBC_NAMESPACE::testing; + +TEST_F(LlvmLibcTanhfTest, FTZMode) { + ModifyMXCSR mxcsr(FTZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(max_denormal)); +} + +TEST_F(LlvmLibcTanhfTest, DAZMode) { + ModifyMXCSR mxcsr(DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(max_denormal)); +} + +TEST_F(LlvmLibcTanhfTest, FTZDAZMode) { + ModifyMXCSR mxcsr(FTZ | DAZ); + + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(min_denormal)); + EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(max_denormal)); +} + +#endif From 266ff98cba44b9e969e6894eaa30d4fe37647210 Mon Sep 17 00:00:00 2001 From: Shih-Po Hung Date: Sat, 26 Oct 2024 23:18:50 +0800 Subject: [PATCH 091/425] [LV][VPlan] Use VF VPValue in VPVectorPointerRecipe (#110974) Refactors VPVectorPointerRecipe to use the VF VPValue to obtain the runtime VF, similar to #95305. Since only reverse vector pointers require the runtime VF, the patch sets VPUnrollPart::PartOpIndex to 1 for vector pointers and 2 for reverse vector pointers. As a result, the generation of reverse vector pointers is moved into a separate recipe. --- .../Transforms/Vectorize/LoopVectorize.cpp | 13 +- llvm/lib/Transforms/Vectorize/VPlan.h | 64 +++++++-- .../Transforms/Vectorize/VPlanAnalysis.cpp | 7 +- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 80 +++++++---- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 4 +- llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 + .../AArch64/sve-vector-reverse.ll | 130 ++++++++---------- .../RISCV/riscv-vector-reverse.ll | 20 +-- ...-force-tail-with-evl-reverse-load-store.ll | 120 ++++++++-------- ...orize-force-tail-with-evl-uniform-store.ll | 6 +- .../LoopVectorize/reverse_induction.ll | 24 ++-- .../vplan-sink-scalars-and-merge.ll | 5 +- 12 files changed, 267 insertions(+), 207 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 865f5e3d2e588da..88086f24dfdce2b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4492,6 +4492,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, case VPDef::VPInstructionSC: case VPDef::VPCanonicalIVPHISC: case VPDef::VPVectorPointerSC: + case VPDef::VPReverseVectorPointerSC: case VPDef::VPExpandSCEVSC: case VPDef::VPEVLBasedIVPHISC: case VPDef::VPPredInstPHISC: @@ -8278,9 +8279,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef Operands, if (Consecutive) { auto *GEP = dyn_cast( Ptr->getUnderlyingValue()->stripPointerCasts()); - auto *VectorPtr = new VPVectorPointerRecipe( - Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false, - I->getDebugLoc()); + VPSingleDefRecipe *VectorPtr; + if (Reverse) + VectorPtr = new VPReverseVectorPointerRecipe( + Ptr, &Plan.getVF(), getLoadStoreType(I), + GEP ? GEP->isInBounds() : false, I->getDebugLoc()); + else + VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I), + GEP ? GEP->isInBounds() : false, + I->getDebugLoc()); Builder.getInsertBlock()->appendRecipe(VectorPtr); Ptr = VectorPtr; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f2e6729a2e26596..a34e34a0d71f1ec 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -905,6 +905,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPReplicateSC: case VPRecipeBase::VPScalarIVStepsSC: case VPRecipeBase::VPVectorPointerSC: + case VPRecipeBase::VPReverseVectorPointerSC: case VPRecipeBase::VPWidenCallSC: case VPRecipeBase::VPWidenCanonicalIVSC: case VPRecipeBase::VPWidenCastSC: @@ -1110,6 +1111,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { R->getVPDefID() == VPRecipeBase::VPWidenGEPSC || R->getVPDefID() == VPRecipeBase::VPWidenCastSC || R->getVPDefID() == VPRecipeBase::VPReplicateSC || + R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC || R->getVPDefID() == VPRecipeBase::VPVectorPointerSC; } @@ -1910,20 +1912,64 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags { #endif }; -/// A recipe to compute the pointers for widened memory accesses of IndexTy for -/// all parts. If IsReverse is true, compute pointers for accessing the input in -/// reverse order per part. +/// A recipe to compute the pointers for widened memory accesses of IndexTy +/// in reverse order. +class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags, + public VPUnrollPartAccessor<2> { + Type *IndexedTy; + +public: + VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, + bool IsInBounds, DebugLoc DL) + : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC, + ArrayRef({Ptr, VF}), + GEPFlagsTy(IsInBounds), DL), + IndexedTy(IndexedTy) {} + + VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC) + + VPValue *getVFValue() { return getOperand(1); } + const VPValue *getVFValue() const { return getOperand(1); } + + void execute(VPTransformState &State) override; + + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + + /// Returns true if the recipe only uses the first part of operand \p Op. + bool onlyFirstPartUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + assert(getNumOperands() <= 2 && "must have at most two operands"); + return true; + } + + VPReverseVectorPointerRecipe *clone() override { + return new VPReverseVectorPointerRecipe( + getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc()); + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + +/// A recipe to compute the pointers for widened memory accesses of IndexTy. class VPVectorPointerRecipe : public VPRecipeWithIRFlags, public VPUnrollPartAccessor<1> { Type *IndexedTy; - bool IsReverse; public: - VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, - bool IsInBounds, DebugLoc DL) + VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds, + DebugLoc DL) : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef(Ptr), GEPFlagsTy(IsInBounds), DL), - IndexedTy(IndexedTy), IsReverse(IsReverse) {} + IndexedTy(IndexedTy) {} VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC) @@ -1944,8 +1990,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags, } VPVectorPointerRecipe *clone() override { - return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse, - isInBounds(), getDebugLoc()); + return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(), + getDebugLoc()); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 3eb5f3f40f842ad..8b8ab6be99b0d57 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -263,9 +263,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { [](const auto *R) { return R->getScalarType(); }) .Case([this](const VPRecipeBase *R) { - return inferScalarType(R->getOperand(0)); - }) + VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>( + [this](const VPRecipeBase *R) { + return inferScalarType(R->getOperand(0)); + }) .Case( diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 2080b77157b6ca2..b1e6086398c4df7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -162,6 +162,7 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPDerivedIVSC: case VPPredInstPHISC: case VPScalarCastSC: + case VPReverseVectorPointerSC: return false; case VPInstructionSC: return mayWriteToMemory(); @@ -1971,38 +1972,63 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -void VPVectorPointerRecipe ::execute(VPTransformState &State) { - auto &Builder = State.Builder; - State.setDebugLocFrom(getDebugLoc()); - unsigned CurrentPart = getUnrollPart(*this); +static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, + unsigned CurrentPart, IRBuilderBase &Builder) { // Use i32 for the gep index type when the value is constant, // or query DataLayout for a more suitable index type otherwise. const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0) - ? DL.getIndexType(Builder.getPtrTy(0)) - : Builder.getInt32Ty(); + return IsScalable && (IsReverse || CurrentPart > 0) + ? DL.getIndexType(Builder.getPtrTy(0)) + : Builder.getInt32Ty(); +} + +void VPReverseVectorPointerRecipe::execute(VPTransformState &State) { + auto &Builder = State.Builder; + State.setDebugLocFrom(getDebugLoc()); + unsigned CurrentPart = getUnrollPart(*this); + Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true, + CurrentPart, Builder); + + // The wide store needs to start at the last vector element. + Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); + if (IndexTy != RunTimeVF->getType()) + RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy); + // NumElt = -CurrentPart * RunTimeVF + Value *NumElt = Builder.CreateMul( + ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF); + // LastLane = 1 - RunTimeVF + Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); Value *Ptr = State.get(getOperand(0), VPLane(0)); bool InBounds = isInBounds(); + Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); + ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds); - Value *ResultPtr = nullptr; - if (IsReverse) { - // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. - // RunTimeVF = VScale * VF.getKnownMinValue() - // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue() - Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF); - // NumElt = -CurrentPart * RunTimeVF - Value *NumElt = Builder.CreateMul( - ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF); - // LastLane = 1 - RunTimeVF - Value *LastLane = - Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); - ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds); - ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds); - } else { - Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); - ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); - } + State.set(this, ResultPtr, /*IsScalar*/ true); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent; + printAsOperand(O, SlotTracker); + O << " = reverse-vector-pointer "; + if (isInBounds()) + O << "inbounds "; + printOperands(O, SlotTracker); +} +#endif + +void VPVectorPointerRecipe::execute(VPTransformState &State) { + auto &Builder = State.Builder; + State.setDebugLocFrom(getDebugLoc()); + unsigned CurrentPart = getUnrollPart(*this); + Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false, + CurrentPart, Builder); + Value *Ptr = State.get(getOperand(0), VPLane(0)); + bool InBounds = isInBounds(); + + Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); + Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds); State.set(this, ResultPtr, /*IsScalar*/ true); } @@ -2013,8 +2039,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent, O << Indent; printAsOperand(O, SlotTracker); O << " = vector-pointer "; - if (IsReverse) - O << "(reverse) "; printOperands(O, SlotTracker); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index ca78f32506ef715..1e32865e8ee576d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) { // Add operand indicating the part to generate code for, to recipes still // requiring it. if (isa(Copy) || + VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) || match(Copy, m_VPInstruction( m_VPValue()))) Copy->addOperand(getConstantVPV(Part)); - if (isa(R)) + if (isa(R)) Copy->setOperand(0, R.getOperand(0)); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 1900182f76e0715..89b3ed72b8eb65f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -346,6 +346,7 @@ class VPDef { VPScalarCastSC, VPScalarIVStepsSC, VPVectorPointerSC, + VPReverseVectorPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll index 81121019efe767c..76562e80fbc4a19 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -24,43 +24,36 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[TMP4]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP4]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[N]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = shl i64 [[TMP9]], 3 -; CHECK-NEXT: [[TMP11:%.*]] = sub i64 1, [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = shl i64 [[TMP13]], 3 -; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 [[TMP16]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP12]], align 8 -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP18]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = fadd [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = shl i64 [[TMP22]], 3 -; CHECK-NEXT: [[TMP24:%.*]] = sub i64 1, [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 3 -; CHECK-NEXT: [[TMP28:%.*]] = sub i64 0, [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = sub i64 1, [[TMP27]] -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP28]] -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP29]] -; CHECK-NEXT: store [[TMP19]], ptr [[TMP25]], align 8 -; CHECK-NEXT: store [[TMP20]], ptr [[TMP31]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[INDEX]], -1 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[N]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 0, [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP11]], align 8 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = fadd [[WIDE_LOAD1]], shufflevector ( insertelement ( poison, double 1.000000e+00, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = sub i64 0, [[TMP5]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP5]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]] +; CHECK-NEXT: store [[TMP16]], ptr [[TMP20]], align 8 +; CHECK-NEXT: store [[TMP17]], ptr [[TMP24]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -75,8 +68,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{ ; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP33:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP33]], 1.000000e+00 +; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP26]], 1.000000e+00 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]] ; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 @@ -126,43 +119,36 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[TMP7]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[TMP7]], 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[INDEX]], -1 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[N]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 3 -; CHECK-NEXT: [[TMP14:%.*]] = sub i64 1, [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP17:%.*]] = shl i64 [[TMP16]], 3 -; CHECK-NEXT: [[TMP18:%.*]] = sub i64 0, [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = sub i64 1, [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i64 [[TMP19]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 8 -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP21]], align 8 -; CHECK-NEXT: [[TMP22:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP23:%.*]] = add [[WIDE_LOAD3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[TMP25]], 3 -; CHECK-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP26]] -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP30:%.*]] = shl i64 [[TMP29]], 3 -; CHECK-NEXT: [[TMP31:%.*]] = sub i64 0, [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = sub i64 1, [[TMP30]] -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP31]] -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[TMP33]], i64 [[TMP32]] -; CHECK-NEXT: store [[TMP22]], ptr [[TMP28]], align 8 -; CHECK-NEXT: store [[TMP23]], ptr [[TMP34]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = xor i64 [[INDEX]], -1 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[N]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP8]] +; CHECK-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP16]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 8 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP18]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = sub i64 0, [[TMP8]] +; CHECK-NEXT: [[TMP25:%.*]] = sub i64 1, [[TMP8]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[TMP26]], i64 [[TMP25]] +; CHECK-NEXT: store [[TMP19]], ptr [[TMP23]], align 8 +; CHECK-NEXT: store [[TMP20]], ptr [[TMP27]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -177,8 +163,8 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]] -; CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP36]], 1 +; CHECK-NEXT: [[TMP29:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP29]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 9a716f7756072e1..c7bb1ffab23e79e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -54,6 +54,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -74,11 +75,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%1>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> @@ -138,6 +139,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -158,11 +160,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%13> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%add9> = add ir<%13>, ir<1> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%add9> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> @@ -259,6 +261,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1 ; CHECK-NEXT: LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1 ; CHECK-NEXT: VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -279,11 +282,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR2]]>, ir<%conv1> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> @@ -343,6 +346,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK-NEXT: VPlan 'Final VPlan for VF={vscale x 4},UF={1}' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -363,11 +367,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1> ; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0> ; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%13> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%conv1> = fadd ir<%13>, ir<1.000000e+00> ; CHECK-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx3> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]> ; CHECK-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%conv1> ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]> ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll index c1cf8b0fc541e74..9a001f36da7d4fb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -28,34 +28,30 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true) +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 -; IF-EVL-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], -1 -; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]] -; IF-EVL-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; IF-EVL-NEXT: [[TMP12:%.*]] = mul i64 0, [[TMP11]] -; IF-EVL-NEXT: [[TMP13:%.*]] = sub i64 1, [[TMP11]] -; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP12]] -; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP13]] -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP8]] -; IF-EVL-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4 -; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 0, [[TMP18]] -; IF-EVL-NEXT: [[TMP20:%.*]] = sub i64 1, [[TMP18]] -; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP19]] -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP20]] -; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP22]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], -1 +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]] +; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 0, [[TMP4]] +; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]] +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]] +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]] +; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 0, [[TMP4]] +; IF-EVL-NEXT: [[TMP15:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]] +; IF-EVL-NEXT: [[VP_REVERSE3:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE3]], ptr align 4 [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: @@ -131,49 +127,45 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal ; IF-EVL: vector.body: ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] -; IF-EVL-NEXT: [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true) +; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) ; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]] -; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 +; IF-EVL-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 ; IF-EVL-NEXT: [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32 -; IF-EVL-NEXT: [[TMP8:%.*]] = add i32 [[OFFSET_IDX3]], 0 +; IF-EVL-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX3]], 0 ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; IF-EVL-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() -; IF-EVL-NEXT: [[TMP10:%.*]] = add zeroinitializer, [[TMP9]] -; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP10]] -; IF-EVL-NEXT: [[TMP11:%.*]] = icmp ule [[VEC_IV]], shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer) -; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[TMP7]], -1 -; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP8]] -; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0 -; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP15:%.*]] = icmp slt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; IF-EVL-NEXT: [[TMP16:%.*]] = select [[TMP11]], [[TMP15]], zeroinitializer -; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP12]] -; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 -; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 0, [[TMP19]] -; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 1, [[TMP19]] -; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP20]] -; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP21]] -; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP23]], [[VP_REVERSE_MASK]], i32 [[TMP6]]) -; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP12]] -; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4 -; IF-EVL-NEXT: [[TMP27:%.*]] = mul i64 0, [[TMP26]] -; IF-EVL-NEXT: [[TMP28:%.*]] = sub i64 1, [[TMP26]] -; IF-EVL-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP27]] -; IF-EVL-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP29]], i64 [[TMP28]] -; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP6]]) -; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP30]], [[VP_REVERSE_MASK6]], i32 [[TMP6]]) -; IF-EVL-NEXT: [[TMP31:%.*]] = zext i32 [[TMP6]] to i64 -; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP31]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP9:%.*]] = add zeroinitializer, [[TMP8]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP9]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule [[VEC_IV]], shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[TMP6]], -1 +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP7]] +; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[TMP15:%.*]] = select [[TMP10]], [[TMP14]], zeroinitializer +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP4]] +; IF-EVL-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]] +; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], [[VP_REVERSE_MASK]], i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_OP_LOAD4]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP4]] +; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP4]] +; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]] +; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call @llvm.experimental.vp.reverse.nxv4i32( [[VP_REVERSE]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call @llvm.experimental.vp.reverse.nxv4i1( [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP5]]) +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_REVERSE5]], ptr align 4 [[TMP25]], [[VP_REVERSE_MASK6]], i32 [[TMP5]]) +; IF-EVL-NEXT: [[TMP26:%.*]] = zext i32 [[TMP5]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]] -; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] ; IF-EVL: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll index 870925950ae4984..c492b296903e60a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll @@ -38,10 +38,8 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i64 0, [[TMP9]] +; CHECK-NEXT: [[TMP18:%.*]] = sub i64 1, [[TMP9]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]] ; CHECK-NEXT: [[VP_REVERSE:%.*]] = call @llvm.experimental.vp.reverse.nxv2i64( zeroinitializer, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP11]]) diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index c1322792071e45a..d983c5138164fc7 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -20,11 +20,11 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], -1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 -4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> @@ -93,11 +93,11 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i128 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = add i128 [[TMP0]], -1 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i128 [[TMP0]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 -4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> @@ -176,11 +176,11 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]] ; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = add i16 [[TMP4]], -1 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[TMP4]], -1 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 -4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 -4 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 0f3cd9d4ca4d614..446b720ad1ba492 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -1101,6 +1101,7 @@ exit: define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-LABEL: LV: Checking a loop in 'ptr_induction_remove_dead_recipe' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count @@ -1115,11 +1116,11 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: vector loop: { ; CHECK-NEXT: vector.body: ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION -; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<%3> * ir<-1> +; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<-1> ; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1> ; CHECK-NEXT: EMIT vp<[[PTR_IV:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]> ; CHECK-NEXT: CLONE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1> -; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%ptr.iv.next> +; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%ptr.iv.next>, vp<[[VF]]> ; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]> ; CHECK-NEXT: WIDEN ir<%c.1> = icmp eq ir<%l>, ir<0> ; CHECK-NEXT: EMIT vp<[[NEG:%.+]]> = not ir<%c.1> From a917ae0b4fc0d792ee0e2c512c4ea539f98e1204 Mon Sep 17 00:00:00 2001 From: isuckatcs <65320245+isuckatcs@users.noreply.github.com> Date: Sat, 26 Oct 2024 17:41:55 +0200 Subject: [PATCH 092/425] [analyzer] Fix a crash from element region construction during `ArrayInitLoopExpr` analysis (#113570) This patch generalizes the way element regions are constructed when an `ArrayInitLoopExpr` is being analyzed. Previously the base region of the `ElementRegion` was determined with pattern matching, which led to crashes, when an unhandled pattern was encountered. Fixes #112813 --- .../lib/StaticAnalyzer/Core/ExprEngineCXX.cpp | 69 ++++--------------- clang/test/Analysis/array-init-loop.cpp | 38 ++++++++++ 2 files changed, 50 insertions(+), 57 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index c50db1e0e2f863e..ccc3097e8d2f971 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -513,70 +513,25 @@ ProgramStateRef ExprEngine::updateObjectsUnderConstruction( static ProgramStateRef bindRequiredArrayElementToEnvironment(ProgramStateRef State, const ArrayInitLoopExpr *AILE, - const LocationContext *LCtx, SVal Idx) { - // The ctor in this case is guaranteed to be a copy ctor, otherwise we hit a - // compile time error. - // - // -ArrayInitLoopExpr <-- we're here - // |-OpaqueValueExpr - // | `-DeclRefExpr <-- match this - // `-CXXConstructExpr - // `-ImplicitCastExpr - // `-ArraySubscriptExpr - // |-ImplicitCastExpr - // | `-OpaqueValueExpr - // | `-DeclRefExpr - // `-ArrayInitIndexExpr - // - // The resulting expression might look like the one below in an implicit - // copy/move ctor. - // - // ArrayInitLoopExpr <-- we're here - // |-OpaqueValueExpr - // | `-MemberExpr <-- match this - // | (`-CXXStaticCastExpr) <-- move ctor only - // | `-DeclRefExpr - // `-CXXConstructExpr - // `-ArraySubscriptExpr - // |-ImplicitCastExpr - // | `-OpaqueValueExpr - // | `-MemberExpr - // | `-DeclRefExpr - // `-ArrayInitIndexExpr - // - // The resulting expression for a multidimensional array. - // ArrayInitLoopExpr <-- we're here - // |-OpaqueValueExpr - // | `-DeclRefExpr <-- match this - // `-ArrayInitLoopExpr - // |-OpaqueValueExpr - // | `-ArraySubscriptExpr - // | |-ImplicitCastExpr - // | | `-OpaqueValueExpr - // | | `-DeclRefExpr - // | `-ArrayInitIndexExpr - // `-CXXConstructExpr <-- extract this - // ` ... - - const auto *OVESrc = AILE->getCommonExpr()->getSourceExpr(); + const LocationContext *LCtx, NonLoc Idx) { + SValBuilder &SVB = State->getStateManager().getSValBuilder(); + MemRegionManager &MRMgr = SVB.getRegionManager(); + ASTContext &Ctx = SVB.getContext(); // HACK: There is no way we can put the index of the array element into the // CFG unless we unroll the loop, so we manually select and bind the required // parameter to the environment. - const auto *CE = + const Expr *SourceArray = AILE->getCommonExpr()->getSourceExpr(); + const auto *Ctor = cast(extractElementInitializerFromNestedAILE(AILE)); - SVal Base = UnknownVal(); - if (const auto *ME = dyn_cast(OVESrc)) - Base = State->getSVal(ME, LCtx); - else if (const auto *DRE = dyn_cast(OVESrc)) - Base = State->getLValue(cast(DRE->getDecl()), LCtx); - else - llvm_unreachable("ArrayInitLoopExpr contains unexpected source expression"); - - SVal NthElem = State->getLValue(CE->getType(), Idx, Base); + const auto *SourceArrayRegion = + cast(State->getSVal(SourceArray, LCtx).getAsRegion()); + const ElementRegion *ElementRegion = + MRMgr.getElementRegion(Ctor->getType(), Idx, SourceArrayRegion, Ctx); - return State->BindExpr(CE->getArg(0), LCtx, NthElem); + return State->BindExpr(Ctor->getArg(0), LCtx, + loc::MemRegionVal(ElementRegion)); } void ExprEngine::handleConstructor(const Expr *E, diff --git a/clang/test/Analysis/array-init-loop.cpp b/clang/test/Analysis/array-init-loop.cpp index 4ab4489fc882f3f..b28468b7f560b2c 100644 --- a/clang/test/Analysis/array-init-loop.cpp +++ b/clang/test/Analysis/array-init-loop.cpp @@ -330,3 +330,41 @@ void no_crash() { } } // namespace crash + +namespace array_subscript_initializer { +struct S { + int x; +}; + +void no_crash() { + S arr[][2] = {{1, 2}}; + + const auto [a, b] = arr[0]; // no-crash + + clang_analyzer_eval(a.x == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(b.x == 2); // expected-warning{{TRUE}} +} +} // namespace array_subscript_initializer + +namespace iterator_initializer { +struct S { + int x; +}; + +void no_crash() { + S arr[][2] = {{1, 2}, {3, 4}}; + + int i = 0; + for (const auto [a, b] : arr) { // no-crash + if (i == 0) { + clang_analyzer_eval(a.x == 1); // expected-warning{{TRUE}} + clang_analyzer_eval(b.x == 2); // expected-warning{{TRUE}} + } else { + clang_analyzer_eval(a.x == 3); // expected-warning{{TRUE}} + clang_analyzer_eval(b.x == 4); // expected-warning{{TRUE}} + } + + ++i; + } +} +} // namespace iterator_initializer From b667d161f0a9ff6b29cda0ccdb0081610c1e8b8c Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Sat, 26 Oct 2024 09:20:26 -0700 Subject: [PATCH 093/425] [StructuralHash] Refactor (#112621) This is largely NFC, and it prepares for #112638. - Use stable_hash instead of uint64_t - Rename update* to hash* functions. They compute stable_hash locally and return it. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608. --- llvm/include/llvm/IR/StructuralHash.h | 7 +- llvm/lib/IR/StructuralHash.cpp | 133 +++++++++++------- llvm/lib/Transforms/IPO/MergeFunctions.cpp | 6 +- .../MergeFunc/call-and-invoke-with-ranges.ll | 17 ++- 4 files changed, 100 insertions(+), 63 deletions(-) diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 57fb45db8491109..e2e192cc9501b3a 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/StableHashing.h" #include namespace llvm { @@ -21,20 +22,18 @@ namespace llvm { class Function; class Module; -using IRHash = uint64_t; - /// Returns a hash of the function \p F. /// \param F The function to hash. /// \param DetailedHash Whether or not to encode additional information in the /// hash. The additional information added into the hash when this flag is set /// to true includes instruction and operand type information. -IRHash StructuralHash(const Function &F, bool DetailedHash = false); +stable_hash StructuralHash(const Function &F, bool DetailedHash = false); /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that /// composed the module hash. -IRHash StructuralHash(const Module &M, bool DetailedHash = false); +stable_hash StructuralHash(const Module &M, bool DetailedHash = false); } // end namespace llvm diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index fb4f33a021a96bc..267a085c5af7053 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -24,61 +24,93 @@ namespace { // by the MergeFunctions pass. class StructuralHashImpl { - uint64_t Hash = 4; + stable_hash Hash = 4; - void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + bool DetailedHash; + + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. + static constexpr stable_hash BlockHeaderHash = 45798; + static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; + static constexpr stable_hash GlobalHeaderHash = 23456; // This will produce different values on 32-bit and 64-bit systens as // hash_combine returns a size_t. However, this is only used for // detailed hashing which, in-tree, only needs to distinguish between // differences in functions. - template void hashArbitaryType(const T &V) { - hash(hash_combine(V)); + // TODO: This is not stable. + template stable_hash hashArbitaryType(const T &V) { + return hash_combine(V); } - void hashType(Type *ValueType) { - hash(ValueType->getTypeID()); + stable_hash hashType(Type *ValueType) { + SmallVector Hashes; + Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) - hash(ValueType->getIntegerBitWidth()); + Hashes.emplace_back(ValueType->getIntegerBitWidth()); + return stable_hash_combine(Hashes); } public: - StructuralHashImpl() = default; - - void updateOperand(Value *Operand) { - hashType(Operand->getType()); - - // The cases enumerated below are not exhaustive and are only aimed to - // get decent coverage over the function. - if (ConstantInt *ConstInt = dyn_cast(Operand)) { - hashArbitaryType(ConstInt->getValue()); - } else if (ConstantFP *ConstFP = dyn_cast(Operand)) { - hashArbitaryType(ConstFP->getValue()); - } else if (Argument *Arg = dyn_cast(Operand)) { - hash(Arg->getArgNo()); - } else if (Function *Func = dyn_cast(Operand)) { + StructuralHashImpl() = delete; + explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + + stable_hash hashConstant(Constant *C) { + SmallVector Hashes; + // TODO: hashArbitaryType() is not stable. + if (ConstantInt *ConstInt = dyn_cast(C)) { + Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); + } else if (ConstantFP *ConstFP = dyn_cast(C)) { + Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); + } else if (Function *Func = dyn_cast(C)) { // Hashing the name will be deterministic as LLVM's hashing infrastructure // has explicit support for hashing strings and will not simply hash // the pointer. - hashArbitaryType(Func->getName()); + Hashes.emplace_back(hashArbitaryType(Func->getName())); } + + return stable_hash_combine(Hashes); + } + + stable_hash hashValue(Value *V) { + // Check constant and return its hash. + Constant *C = dyn_cast(V); + if (C) + return hashConstant(C); + + // Hash argument number. + SmallVector Hashes; + if (Argument *Arg = dyn_cast(V)) + Hashes.emplace_back(Arg->getArgNo()); + + return stable_hash_combine(Hashes); } - void updateInstruction(const Instruction &Inst, bool DetailedHash) { - hash(Inst.getOpcode()); + stable_hash hashOperand(Value *Operand) { + SmallVector Hashes; + Hashes.emplace_back(hashType(Operand->getType())); + Hashes.emplace_back(hashValue(Operand)); + return stable_hash_combine(Hashes); + } + + stable_hash hashInstruction(const Instruction &Inst) { + SmallVector Hashes; + Hashes.emplace_back(Inst.getOpcode()); if (!DetailedHash) - return; + return stable_hash_combine(Hashes); - hashType(Inst.getType()); + Hashes.emplace_back(hashType(Inst.getType())); // Handle additional properties of specific instructions that cause // semantic differences in the IR. if (const auto *ComparisonInstruction = dyn_cast(&Inst)) - hash(ComparisonInstruction->getPredicate()); + Hashes.emplace_back(ComparisonInstruction->getPredicate()); for (const auto &Op : Inst.operands()) - updateOperand(Op); + Hashes.emplace_back(hashOperand(Op)); + + return stable_hash_combine(Hashes); } // A function hash is calculated by considering only the number of arguments @@ -97,15 +129,17 @@ class StructuralHashImpl { // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. - void update(const Function &F, bool DetailedHash) { + void update(const Function &F) { // Declarations don't affect analyses. if (F.isDeclaration()) return; - hash(0x62642d6b6b2d6b72); // Function header + SmallVector Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(FunctionHeaderHash); - hash(F.isVarArg()); - hash(F.arg_size()); + Hashes.emplace_back(F.isVarArg()); + Hashes.emplace_back(F.arg_size()); SmallVector BBs; SmallPtrSet VisitedBBs; @@ -118,17 +152,17 @@ class StructuralHashImpl { while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the - // opcodes - hash(45798); + Hashes.emplace_back(BlockHeaderHash); for (auto &Inst : *BB) - updateInstruction(Inst, DetailedHash); + Hashes.emplace_back(hashInstruction(Inst)); for (const BasicBlock *Succ : successors(BB)) if (VisitedBBs.insert(Succ).second) BBs.push_back(Succ); } + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } void update(const GlobalVariable &GV) { @@ -137,15 +171,20 @@ class StructuralHashImpl { // we ignore anything with the `.llvm` prefix if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) return; - hash(23456); // Global header - hash(GV.getValueType()->getTypeID()); + SmallVector Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(GlobalHeaderHash); + Hashes.emplace_back(GV.getValueType()->getTypeID()); + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } - void update(const Module &M, bool DetailedHash) { + void update(const Module &M) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F, DetailedHash); + update(F); } uint64_t getHash() const { return Hash; } @@ -153,14 +192,14 @@ class StructuralHashImpl { } // namespace -IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { - StructuralHashImpl H; - H.update(F, DetailedHash); +stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { + StructuralHashImpl H(DetailedHash); + H.update(F); return H.getHash(); } -IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { - StructuralHashImpl H; - H.update(M, DetailedHash); +stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { + StructuralHashImpl H(DetailedHash); + H.update(M); return H.getHash(); } diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index b50a700e09038f1..ad16b0b3501495e 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -172,14 +172,14 @@ namespace { class FunctionNode { mutable AssertingVH F; - IRHash Hash; + stable_hash Hash; public: // Note the hash is recalculated potentially multiple times, but it is cheap. FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {} Function *getFunc() const { return F; } - IRHash getHash() const { return Hash; } + stable_hash getHash() const { return Hash; } /// Replace the reference to the function F by the function G, assuming their /// implementations are equal. @@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) { // All functions in the module, ordered by hash. Functions with a unique // hash value are easily eliminated. - std::vector> HashedFuncs; + std::vector> HashedFuncs; for (Function &Func : M) { if (isEligibleForMerging(Func)) { HashedFuncs.push_back({StructuralHash(Func), &Func}); diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll index e7718ca84d31657..0ceb363a67b1fa5 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll @@ -63,6 +63,14 @@ lpad: resume { ptr, i32 } zeroinitializer } +define i8 @call_with_same_range() { +; CHECK-LABEL: @call_with_same_range +; CHECK: tail call i8 @call_with_range + bitcast i8 0 to i8 + %out = call i8 @dummy(), !range !0 + ret i8 %out +} + define i8 @invoke_with_same_range() personality ptr undef { ; CHECK-LABEL: @invoke_with_same_range() ; CHECK: tail call i8 @invoke_with_range() @@ -76,15 +84,6 @@ lpad: resume { ptr, i32 } zeroinitializer } -define i8 @call_with_same_range() { -; CHECK-LABEL: @call_with_same_range -; CHECK: tail call i8 @call_with_range - bitcast i8 0 to i8 - %out = call i8 @dummy(), !range !0 - ret i8 %out -} - - declare i8 @dummy(); declare i32 @__gxx_personality_v0(...) From 96723756231e316e68b0dacb104862eecbee01a2 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Sat, 26 Oct 2024 09:55:21 -0700 Subject: [PATCH 094/425] Revert "[StructuralHash] Refactor (#112621)" This reverts commit b667d161f0a9ff6b29cda0ccdb0081610c1e8b8c. --- llvm/include/llvm/IR/StructuralHash.h | 7 +- llvm/lib/IR/StructuralHash.cpp | 133 +++++++----------- llvm/lib/Transforms/IPO/MergeFunctions.cpp | 6 +- .../MergeFunc/call-and-invoke-with-ranges.ll | 17 +-- 4 files changed, 63 insertions(+), 100 deletions(-) diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index e2e192cc9501b3a..57fb45db8491109 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,7 +14,6 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H -#include "llvm/ADT/StableHashing.h" #include namespace llvm { @@ -22,18 +21,20 @@ namespace llvm { class Function; class Module; +using IRHash = uint64_t; + /// Returns a hash of the function \p F. /// \param F The function to hash. /// \param DetailedHash Whether or not to encode additional information in the /// hash. The additional information added into the hash when this flag is set /// to true includes instruction and operand type information. -stable_hash StructuralHash(const Function &F, bool DetailedHash = false); +IRHash StructuralHash(const Function &F, bool DetailedHash = false); /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that /// composed the module hash. -stable_hash StructuralHash(const Module &M, bool DetailedHash = false); +IRHash StructuralHash(const Module &M, bool DetailedHash = false); } // end namespace llvm diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 267a085c5af7053..fb4f33a021a96bc 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -24,93 +24,61 @@ namespace { // by the MergeFunctions pass. class StructuralHashImpl { - stable_hash Hash = 4; + uint64_t Hash = 4; - bool DetailedHash; - - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. - static constexpr stable_hash BlockHeaderHash = 45798; - static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; - static constexpr stable_hash GlobalHeaderHash = 23456; + void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } // This will produce different values on 32-bit and 64-bit systens as // hash_combine returns a size_t. However, this is only used for // detailed hashing which, in-tree, only needs to distinguish between // differences in functions. - // TODO: This is not stable. - template stable_hash hashArbitaryType(const T &V) { - return hash_combine(V); + template void hashArbitaryType(const T &V) { + hash(hash_combine(V)); } - stable_hash hashType(Type *ValueType) { - SmallVector Hashes; - Hashes.emplace_back(ValueType->getTypeID()); + void hashType(Type *ValueType) { + hash(ValueType->getTypeID()); if (ValueType->isIntegerTy()) - Hashes.emplace_back(ValueType->getIntegerBitWidth()); - return stable_hash_combine(Hashes); + hash(ValueType->getIntegerBitWidth()); } public: - StructuralHashImpl() = delete; - explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} - - stable_hash hashConstant(Constant *C) { - SmallVector Hashes; - // TODO: hashArbitaryType() is not stable. - if (ConstantInt *ConstInt = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); - } else if (ConstantFP *ConstFP = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); - } else if (Function *Func = dyn_cast(C)) { + StructuralHashImpl() = default; + + void updateOperand(Value *Operand) { + hashType(Operand->getType()); + + // The cases enumerated below are not exhaustive and are only aimed to + // get decent coverage over the function. + if (ConstantInt *ConstInt = dyn_cast(Operand)) { + hashArbitaryType(ConstInt->getValue()); + } else if (ConstantFP *ConstFP = dyn_cast(Operand)) { + hashArbitaryType(ConstFP->getValue()); + } else if (Argument *Arg = dyn_cast(Operand)) { + hash(Arg->getArgNo()); + } else if (Function *Func = dyn_cast(Operand)) { // Hashing the name will be deterministic as LLVM's hashing infrastructure // has explicit support for hashing strings and will not simply hash // the pointer. - Hashes.emplace_back(hashArbitaryType(Func->getName())); + hashArbitaryType(Func->getName()); } - - return stable_hash_combine(Hashes); - } - - stable_hash hashValue(Value *V) { - // Check constant and return its hash. - Constant *C = dyn_cast(V); - if (C) - return hashConstant(C); - - // Hash argument number. - SmallVector Hashes; - if (Argument *Arg = dyn_cast(V)) - Hashes.emplace_back(Arg->getArgNo()); - - return stable_hash_combine(Hashes); } - stable_hash hashOperand(Value *Operand) { - SmallVector Hashes; - Hashes.emplace_back(hashType(Operand->getType())); - Hashes.emplace_back(hashValue(Operand)); - return stable_hash_combine(Hashes); - } - - stable_hash hashInstruction(const Instruction &Inst) { - SmallVector Hashes; - Hashes.emplace_back(Inst.getOpcode()); + void updateInstruction(const Instruction &Inst, bool DetailedHash) { + hash(Inst.getOpcode()); if (!DetailedHash) - return stable_hash_combine(Hashes); + return; - Hashes.emplace_back(hashType(Inst.getType())); + hashType(Inst.getType()); // Handle additional properties of specific instructions that cause // semantic differences in the IR. if (const auto *ComparisonInstruction = dyn_cast(&Inst)) - Hashes.emplace_back(ComparisonInstruction->getPredicate()); + hash(ComparisonInstruction->getPredicate()); for (const auto &Op : Inst.operands()) - Hashes.emplace_back(hashOperand(Op)); - - return stable_hash_combine(Hashes); + updateOperand(Op); } // A function hash is calculated by considering only the number of arguments @@ -129,17 +97,15 @@ class StructuralHashImpl { // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. - void update(const Function &F) { + void update(const Function &F, bool DetailedHash) { // Declarations don't affect analyses. if (F.isDeclaration()) return; - SmallVector Hashes; - Hashes.emplace_back(Hash); - Hashes.emplace_back(FunctionHeaderHash); + hash(0x62642d6b6b2d6b72); // Function header - Hashes.emplace_back(F.isVarArg()); - Hashes.emplace_back(F.arg_size()); + hash(F.isVarArg()); + hash(F.arg_size()); SmallVector BBs; SmallPtrSet VisitedBBs; @@ -152,17 +118,17 @@ class StructuralHashImpl { while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); - Hashes.emplace_back(BlockHeaderHash); + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the + // opcodes + hash(45798); for (auto &Inst : *BB) - Hashes.emplace_back(hashInstruction(Inst)); + updateInstruction(Inst, DetailedHash); for (const BasicBlock *Succ : successors(BB)) if (VisitedBBs.insert(Succ).second) BBs.push_back(Succ); } - - // Update the combined hash in place. - Hash = stable_hash_combine(Hashes); } void update(const GlobalVariable &GV) { @@ -171,20 +137,15 @@ class StructuralHashImpl { // we ignore anything with the `.llvm` prefix if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) return; - SmallVector Hashes; - Hashes.emplace_back(Hash); - Hashes.emplace_back(GlobalHeaderHash); - Hashes.emplace_back(GV.getValueType()->getTypeID()); - - // Update the combined hash in place. - Hash = stable_hash_combine(Hashes); + hash(23456); // Global header + hash(GV.getValueType()->getTypeID()); } - void update(const Module &M) { + void update(const Module &M, bool DetailedHash) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F); + update(F, DetailedHash); } uint64_t getHash() const { return Hash; } @@ -192,14 +153,14 @@ class StructuralHashImpl { } // namespace -stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { - StructuralHashImpl H(DetailedHash); - H.update(F); +IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { + StructuralHashImpl H; + H.update(F, DetailedHash); return H.getHash(); } -stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { - StructuralHashImpl H(DetailedHash); - H.update(M); +IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { + StructuralHashImpl H; + H.update(M, DetailedHash); return H.getHash(); } diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index ad16b0b3501495e..b50a700e09038f1 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -172,14 +172,14 @@ namespace { class FunctionNode { mutable AssertingVH F; - stable_hash Hash; + IRHash Hash; public: // Note the hash is recalculated potentially multiple times, but it is cheap. FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {} Function *getFunc() const { return F; } - stable_hash getHash() const { return Hash; } + IRHash getHash() const { return Hash; } /// Replace the reference to the function F by the function G, assuming their /// implementations are equal. @@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) { // All functions in the module, ordered by hash. Functions with a unique // hash value are easily eliminated. - std::vector> HashedFuncs; + std::vector> HashedFuncs; for (Function &Func : M) { if (isEligibleForMerging(Func)) { HashedFuncs.push_back({StructuralHash(Func), &Func}); diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll index 0ceb363a67b1fa5..e7718ca84d31657 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll @@ -63,14 +63,6 @@ lpad: resume { ptr, i32 } zeroinitializer } -define i8 @call_with_same_range() { -; CHECK-LABEL: @call_with_same_range -; CHECK: tail call i8 @call_with_range - bitcast i8 0 to i8 - %out = call i8 @dummy(), !range !0 - ret i8 %out -} - define i8 @invoke_with_same_range() personality ptr undef { ; CHECK-LABEL: @invoke_with_same_range() ; CHECK: tail call i8 @invoke_with_range() @@ -84,6 +76,15 @@ lpad: resume { ptr, i32 } zeroinitializer } +define i8 @call_with_same_range() { +; CHECK-LABEL: @call_with_same_range +; CHECK: tail call i8 @call_with_range + bitcast i8 0 to i8 + %out = call i8 @dummy(), !range !0 + ret i8 %out +} + + declare i8 @dummy(); declare i32 @__gxx_personality_v0(...) From a3181b11b5b758cfa852c0d27465f84ab3b079bb Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 26 Oct 2024 10:02:11 -0700 Subject: [PATCH 095/425] [ADT] Teach StringRef to derive from std::string_view (#113752) This patch makes minimum changes to have StringRef derive from std::string_view, with an eventual goal of removing StringRef completely. Subsequent patches will further clean up the remaining bits. I've chosen public inheritance over private one because our codebase relies on implicit conversions to std::string_view, which is currently provided by: constexpr operator std::string_view() const { return std::string_view(data(), size()); } This implicit conversion stops applying once we use std::string_view as a base class, public or private. If we chose a private base class, we would lose the implicit conversion. --- llvm/include/llvm/ADT/StringRef.h | 41 ++++++++----------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index d5f30b88c4c6a2a..49b6b8ff52abec5 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -48,7 +48,9 @@ namespace llvm { /// situations where the character data resides in some other buffer, whose /// lifetime extends past that of the StringRef. For this reason, it is not in /// general safe to store a StringRef. - class LLVM_GSL_POINTER StringRef { + class LLVM_GSL_POINTER StringRef : public std::string_view { + using Base = std::string_view; + public: static constexpr size_t npos = ~size_t(0); @@ -60,12 +62,6 @@ namespace llvm { using const_reverse_iterator = std::reverse_iterator; private: - /// The start of the string, in an external buffer. - const char *Data = nullptr; - - /// The length of the string. - size_t Length = 0; - // Workaround memcmp issue with null pointers (undefined behavior) // by providing a specialized version static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { @@ -86,27 +82,25 @@ namespace llvm { /// Construct a string ref from a cstring. /*implicit*/ constexpr StringRef(const char *Str) - : Data(Str), Length(Str ? + : Base(Str, Str ? // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 - __builtin_strlen(Str) + __builtin_strlen(Str) #else - std::char_traits::length(Str) + std::char_traits::length(Str) #endif - : 0) { + : 0) { } /// Construct a string ref from a pointer and length. /*implicit*/ constexpr StringRef(const char *data, size_t length) - : Data(data), Length(length) {} + : Base(data, length) {} /// Construct a string ref from an std::string. - /*implicit*/ StringRef(const std::string &Str) - : Data(Str.data()), Length(Str.length()) {} + /*implicit*/ StringRef(const std::string &Str) : Base(Str) {} /// Construct a string ref from an std::string_view. - /*implicit*/ constexpr StringRef(std::string_view Str) - : Data(Str.data()), Length(Str.size()) {} + /*implicit*/ constexpr StringRef(std::string_view Str) : Base(Str) {} /// @} /// @name Iterators @@ -138,16 +132,9 @@ namespace llvm { /// @name String Operations /// @{ - /// data - Get a pointer to the start of the string (which may not be null - /// terminated). - [[nodiscard]] constexpr const char *data() const { return Data; } - /// empty - Check if the string is empty. [[nodiscard]] constexpr bool empty() const { return size() == 0; } - /// size - Get the string size. - [[nodiscard]] constexpr size_t size() const { return Length; } - /// front - Get the first character in the string. [[nodiscard]] char front() const { assert(!empty()); @@ -248,14 +235,6 @@ namespace llvm { std::enable_if_t::value, StringRef> & operator=(T &&Str) = delete; - /// @} - /// @name Type Conversions - /// @{ - - constexpr operator std::string_view() const { - return std::string_view(data(), size()); - } - /// @} /// @name String Predicates /// @{ From ef886a25805b6aa0cd42886791af5ce26d575f10 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 26 Oct 2024 10:32:09 -0700 Subject: [PATCH 096/425] Revert "[ADT] Teach StringRef to derive from std::string_view" (#113767) Reverts llvm/llvm-project#113752 Many build bot failures --- llvm/include/llvm/ADT/StringRef.h | 41 +++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index 49b6b8ff52abec5..d5f30b88c4c6a2a 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -48,9 +48,7 @@ namespace llvm { /// situations where the character data resides in some other buffer, whose /// lifetime extends past that of the StringRef. For this reason, it is not in /// general safe to store a StringRef. - class LLVM_GSL_POINTER StringRef : public std::string_view { - using Base = std::string_view; - + class LLVM_GSL_POINTER StringRef { public: static constexpr size_t npos = ~size_t(0); @@ -62,6 +60,12 @@ namespace llvm { using const_reverse_iterator = std::reverse_iterator; private: + /// The start of the string, in an external buffer. + const char *Data = nullptr; + + /// The length of the string. + size_t Length = 0; + // Workaround memcmp issue with null pointers (undefined behavior) // by providing a specialized version static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) { @@ -82,25 +86,27 @@ namespace llvm { /// Construct a string ref from a cstring. /*implicit*/ constexpr StringRef(const char *Str) - : Base(Str, Str ? + : Data(Str), Length(Str ? // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 - __builtin_strlen(Str) + __builtin_strlen(Str) #else - std::char_traits::length(Str) + std::char_traits::length(Str) #endif - : 0) { + : 0) { } /// Construct a string ref from a pointer and length. /*implicit*/ constexpr StringRef(const char *data, size_t length) - : Base(data, length) {} + : Data(data), Length(length) {} /// Construct a string ref from an std::string. - /*implicit*/ StringRef(const std::string &Str) : Base(Str) {} + /*implicit*/ StringRef(const std::string &Str) + : Data(Str.data()), Length(Str.length()) {} /// Construct a string ref from an std::string_view. - /*implicit*/ constexpr StringRef(std::string_view Str) : Base(Str) {} + /*implicit*/ constexpr StringRef(std::string_view Str) + : Data(Str.data()), Length(Str.size()) {} /// @} /// @name Iterators @@ -132,9 +138,16 @@ namespace llvm { /// @name String Operations /// @{ + /// data - Get a pointer to the start of the string (which may not be null + /// terminated). + [[nodiscard]] constexpr const char *data() const { return Data; } + /// empty - Check if the string is empty. [[nodiscard]] constexpr bool empty() const { return size() == 0; } + /// size - Get the string size. + [[nodiscard]] constexpr size_t size() const { return Length; } + /// front - Get the first character in the string. [[nodiscard]] char front() const { assert(!empty()); @@ -235,6 +248,14 @@ namespace llvm { std::enable_if_t::value, StringRef> & operator=(T &&Str) = delete; + /// @} + /// @name Type Conversions + /// @{ + + constexpr operator std::string_view() const { + return std::string_view(data(), size()); + } + /// @} /// @name String Predicates /// @{ From c8140d0d7fa943f5ae339cb5e6f4fd8644d5ae89 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 26 Oct 2024 10:47:51 -0700 Subject: [PATCH 097/425] [MCParser] De-capitalize ELFAsmParser comments. NFC --- llvm/lib/MC/MCParser/ELFAsmParser.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 9cdc2aafe3d2241..b58210b3c268e99 100644 --- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -163,7 +163,7 @@ class ELFAsmParser : public MCAsmParserExtension { } // end anonymous namespace -/// ParseDirectiveSymbolAttribute +/// parseDirectiveSymbolAttribute /// ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ] bool ELFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { MCSymbolAttr Attr = StringSwitch(Directive) @@ -746,7 +746,7 @@ static MCSymbolAttr MCAttrForString(StringRef Type) { .Default(MCSA_Invalid); } -/// ParseDirectiveELFType +/// parseDirectiveELFType /// ::= .type identifier , STT_ /// ::= .type identifier , #attribute /// ::= .type identifier , @attribute @@ -803,7 +803,7 @@ bool ELFAsmParser::parseDirectiveType(StringRef, SMLoc) { return false; } -/// ParseDirectiveIdent +/// parseDirectiveIdent /// ::= .ident string bool ELFAsmParser::parseDirectiveIdent(StringRef, SMLoc) { if (getLexer().isNot(AsmToken::String)) @@ -821,7 +821,7 @@ bool ELFAsmParser::parseDirectiveIdent(StringRef, SMLoc) { return false; } -/// ParseDirectiveSymver +/// parseDirectiveSymver /// ::= .symver foo, bar2@zed bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) { StringRef OriginalName, Name, Action; @@ -858,7 +858,7 @@ bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) { return false; } -/// ParseDirectiveVersion +/// parseDirectiveVersion /// ::= .version string bool ELFAsmParser::parseDirectiveVersion(StringRef, SMLoc) { if (getLexer().isNot(AsmToken::String)) @@ -882,7 +882,7 @@ bool ELFAsmParser::parseDirectiveVersion(StringRef, SMLoc) { return false; } -/// ParseDirectiveWeakref +/// parseDirectiveWeakref /// ::= .weakref foo, bar bool ELFAsmParser::parseDirectiveWeakref(StringRef, SMLoc) { // FIXME: Share code with the other alias building directives. From 0be1883c36fc19e4020fea12902481c3dd3436d2 Mon Sep 17 00:00:00 2001 From: Hui Date: Sat, 26 Oct 2024 18:58:53 +0100 Subject: [PATCH 098/425] [libc++] Implement P0429R9 `std::flat_map` (#98643) Around half of the tests are based on the tests Arthur O'Dwyer's original implementation of std::flat_map, with modifications and removals. partially implement #105190 --- libcxx/docs/ReleaseNotes/20.rst | 1 + libcxx/docs/Status/Cxx23Papers.csv | 2 +- libcxx/include/CMakeLists.txt | 3 + libcxx/include/__flat_map/flat_map.h | 1359 +++++++++++++++++ libcxx/include/__flat_map/sorted_unique.h | 31 + libcxx/include/__utility/exception_guard.h | 6 + libcxx/include/flat_map | 54 + libcxx/include/module.modulemap | 8 + libcxx/modules/std.compat.cppm.in | 3 - libcxx/modules/std.cppm.in | 4 +- libcxx/modules/std/flat_map.inc | 12 +- .../flat.map/assert.input_range.pass.cpp | 66 + .../flat.map/assert.sorted_unique.pass.cpp | 225 +++ .../flat.map/container_stability.pass.cpp | 68 + .../test/libcxx/transitive_includes/cxx03.csv | 44 + .../test/libcxx/transitive_includes/cxx11.csv | 44 + .../test/libcxx/transitive_includes/cxx14.csv | 45 + .../test/libcxx/transitive_includes/cxx17.csv | 44 + .../test/libcxx/transitive_includes/cxx20.csv | 44 + .../test/libcxx/transitive_includes/cxx23.csv | 25 + .../test/libcxx/transitive_includes/cxx26.csv | 25 + .../container.adaptors/NaiveStaticVector.h | 94 ++ .../flat.map.syn/sorted_unique.pass.cpp | 44 + .../flat.map/flat.map.access/at.pass.cpp | 92 ++ .../flat.map.access/at_transparent.pass.cpp | 111 ++ .../flat.map.access/index_key.pass.cpp | 77 + .../flat.map.access/index_rv_key.pass.cpp | 68 + .../index_transparent.pass.cpp | 107 ++ .../flat.map/flat.map.capacity/empty.pass.cpp | 47 + .../flat.map.capacity/empty.verify.cpp | 24 + .../flat.map.capacity/max_size.pass.cpp | 76 + .../flat.map/flat.map.capacity/size.pass.cpp | 65 + .../flat.map/flat.map.cons/alloc.pass.cpp | 72 + .../assign_initializer_list.pass.cpp | 59 + .../flat.map/flat.map.cons/compare.pass.cpp | 93 ++ .../flat.map.cons/containers.pass.cpp | 184 +++ .../flat.map/flat.map.cons/copy.pass.cpp | 70 + .../flat.map.cons/copy_alloc.pass.cpp | 67 + .../copy_assign.addressof.compile.pass.cpp | 30 + .../flat.map.cons/copy_assign.pass.cpp | 92 ++ .../flat.map/flat.map.cons/deduct.pass.cpp | 342 +++++ .../flat.map/flat.map.cons/deduct.verify.cpp | 97 ++ .../flat.map.cons/deduct_pmr.pass.cpp | 106 ++ .../flat.map/flat.map.cons/default.pass.cpp | 72 + .../flat.map.cons/default_noexcept.pass.cpp | 57 + .../flat.map.cons/dtor_noexcept.pass.cpp | 53 + .../flat.map.cons/initializer_list.pass.cpp | 157 ++ .../flat.map/flat.map.cons/iter_iter.pass.cpp | 154 ++ .../iter_iter_stability.pass.cpp | 65 + .../flat.map/flat.map.cons/move.pass.cpp | 88 ++ .../flat.map.cons/move_alloc.pass.cpp | 82 + .../flat.map.cons/move_assign.pass.cpp | 74 + .../flat.map.cons/move_assign_clears.pass.cpp | 104 ++ .../move_assign_noexcept.pass.cpp | 110 ++ .../flat.map.cons/move_exceptions.pass.cpp | 71 + .../flat.map.cons/move_noexcept.pass.cpp | 102 ++ .../flat.map/flat.map.cons/pmr.pass.cpp | 361 +++++ .../flat.map/flat.map.cons/range.pass.cpp | 227 +++ .../flat.map.cons/sorted_container.pass.cpp | 165 ++ .../sorted_initializer_list.pass.cpp | 179 +++ .../flat.map.cons/sorted_iter_iter.pass.cpp | 171 +++ .../flat.map.erasure/erase_if.pass.cpp | 93 ++ .../erase_if_exceptions.pass.cpp | 155 ++ .../flat.map.iterators/iterator.pass.cpp | 96 ++ .../iterator_comparison.pass.cpp | 155 ++ ...rator_concept_conformance.compile.pass.cpp | 82 + ...range_concept_conformance.compile.pass.cpp | 55 + .../reverse_iterator.pass.cpp | 90 ++ .../flat.map.modifiers/clear.pass.cpp | 64 + .../flat.map.modifiers/emplace.pass.cpp | 103 ++ .../flat.map.modifiers/emplace_hint.pass.cpp | 102 ++ .../flat.map.modifiers/erase_iter.pass.cpp | 151 ++ .../erase_iter_iter.pass.cpp | 109 ++ .../flat.map.modifiers/erase_key.pass.cpp | 91 ++ .../erase_key_transparent.pass.cpp | 144 ++ .../flat.map.modifiers/extract.pass.cpp | 91 ++ .../flat.map.modifiers/insert_cv.pass.cpp | 83 + .../insert_initializer_list.pass.cpp | 68 + .../insert_iter_cv.pass.cpp | 79 + .../insert_iter_iter.pass.cpp | 89 ++ .../insert_iter_rv.pass.cpp | 88 ++ .../insert_or_assign.pass.cpp | 326 ++++ .../insert_or_assign_transparent.pass.cpp | 259 ++++ .../flat.map.modifiers/insert_range.pass.cpp | 109 ++ .../insert_range_stability.pass.cpp | 63 + .../flat.map.modifiers/insert_rv.pass.cpp | 124 ++ .../insert_sorted_initializer_list.pass.cpp | 66 + .../insert_sorted_iter_iter.pass.cpp | 86 ++ .../insert_transparent.pass.cpp | 167 ++ .../flat.map.modifiers/replace.pass.cpp | 80 + .../swap_exception.pass.cpp | 78 + .../flat.map.modifiers/swap_free.pass.cpp | 97 ++ .../flat.map.modifiers/swap_member.pass.cpp | 95 ++ .../flat.map.modifiers/try_emplace.pass.cpp | 246 +++ .../try_emplace_transparent.pass.cpp | 182 +++ .../flat.map/flat.map.observers/comp.pass.cpp | 96 ++ .../flat.map.observers/keys_values.pass.cpp | 57 + .../flat.map.operations/contains.pass.cpp | 70 + .../contains_transparent.pass.cpp | 71 + .../flat.map.operations/count.pass.cpp | 69 + .../count_transparent.pass.cpp | 72 + .../flat.map.operations/equal_range.pass.cpp | 78 + .../equal_range_transparent.pass.cpp | 100 ++ .../flat.map.operations/find.pass.cpp | 55 + .../find_transparent.pass.cpp | 88 ++ .../flat.map.operations/lower_bound.pass.cpp | 71 + .../lower_bound_transparent.pass.cpp | 95 ++ .../flat.map.operations/upper_bound.pass.cpp | 72 + .../upper_bound_transparent.pass.cpp | 94 ++ .../container.adaptors/flat.map/helpers.h | 394 +++++ .../flat.map/incomplete_type.pass.cpp | 32 + .../flat.map/op_compare.pass.cpp | 118 ++ .../flat.map/types.compile.pass.cpp | 133 ++ libcxx/test/support/MinSequenceContainer.h | 83 + libcxx/utils/libcxx/header_information.py | 2 +- 115 files changed, 12225 insertions(+), 13 deletions(-) create mode 100644 libcxx/include/__flat_map/flat_map.h create mode 100644 libcxx/include/__flat_map/sorted_unique.h create mode 100644 libcxx/include/flat_map create mode 100644 libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.input_range.pass.cpp create mode 100644 libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.sorted_unique.pass.cpp create mode 100644 libcxx/test/libcxx/containers/containers.adaptors/flat.map/container_stability.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/NaiveStaticVector.h create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_unique.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_key.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_rv_key.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.verify.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/max_size.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/size.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/alloc.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/assign_initializer_list.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/compare.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/containers.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_alloc.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.addressof.compile.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.verify.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct_pmr.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default_noexcept.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/dtor_noexcept.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/initializer_list.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_alloc.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_clears.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_noexcept.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_exceptions.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_noexcept.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/pmr.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/range.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_container.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_initializer_list.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_iter_iter.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if_exceptions.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_comparison.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_concept_conformance.compile.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/range_concept_conformance.compile.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/clear.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace_hint.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter_iter.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/extract.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_cv.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_initializer_list.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_cv.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_iter.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_rv.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range_stability.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_rv.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_initializer_list.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_iter_iter.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/replace.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_exception.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_free.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_member.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/comp.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/keys_values.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound_transparent.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/helpers.h create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/op_compare.pass.cpp create mode 100644 libcxx/test/std/containers/container.adaptors/flat.map/types.compile.pass.cpp create mode 100644 libcxx/test/support/MinSequenceContainer.h diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 39546493ae8d6fd..84080e7cbafe2c4 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -43,6 +43,7 @@ Implemented Papers - P2985R0: A type trait for detecting virtual base classes (`Github `__) - ``std::jthread`` and ```` are not guarded behind ``-fexperimental-library`` anymore - P2674R1: A trait for implicit lifetime types (`Github `__) +- P0429R9: A Standard ``flat_map`` is partially implemented and ``flat_map`` is provided (`Github `__) Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv index c64f1c4171fce1a..6f1626da73507ee 100644 --- a/libcxx/docs/Status/Cxx23Papers.csv +++ b/libcxx/docs/Status/Cxx23Papers.csv @@ -52,7 +52,7 @@ "`P2443R1 `__","``views::chunk_by``","2022-02 (Virtual)","|Complete|","18.0","" "","","","","","" "`P0009R18 `__","mdspan: A Non-Owning Multidimensional Array Reference","2022-07 (Virtual)","|Complete|","18.0","" -"`P0429R9 `__","A Standard ``flat_map``","2022-07 (Virtual)","","","" +"`P0429R9 `__","A Standard ``flat_map``","2022-07 (Virtual)","|In progress|","","" "`P1169R4 `__","``static operator()``","2022-07 (Virtual)","|Complete|","16.0","" "`P1222R4 `__","A Standard ``flat_set``","2022-07 (Virtual)","","","" "`P1223R5 `__","``ranges::find_last()``, ``ranges::find_last_if()``, and ``ranges::find_last_if_not()``","2022-07 (Virtual)","|Complete|","19.0","" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index bb152af82cad5c3..e84a55e25f2fa42 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -358,6 +358,8 @@ set(files __filesystem/recursive_directory_iterator.h __filesystem/space_info.h __filesystem/u8path.h + __flat_map/flat_map.h + __flat_map/sorted_unique.h __format/buffer.h __format/concepts.h __format/container_adaptor.h @@ -959,6 +961,7 @@ set(files ext/hash_set fenv.h filesystem + flat_map float.h format forward_list diff --git a/libcxx/include/__flat_map/flat_map.h b/libcxx/include/__flat_map/flat_map.h new file mode 100644 index 000000000000000..9ca32d5295bd27d --- /dev/null +++ b/libcxx/include/__flat_map/flat_map.h @@ -0,0 +1,1359 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FLAT_MAP_FLAT_MAP_H +#define _LIBCPP___FLAT_MAP_FLAT_MAP_H + +#include <__algorithm/lexicographical_compare_three_way.h> +#include <__algorithm/ranges_adjacent_find.h> +#include <__algorithm/ranges_equal.h> +#include <__algorithm/ranges_inplace_merge.h> +#include <__algorithm/ranges_lower_bound.h> +#include <__algorithm/ranges_partition_point.h> +#include <__algorithm/ranges_stable_sort.h> +#include <__algorithm/ranges_unique.h> +#include <__algorithm/ranges_upper_bound.h> +#include <__compare/synth_three_way.h> +#include <__concepts/convertible_to.h> +#include <__concepts/swappable.h> +#include <__config> +#include <__flat_map/sorted_unique.h> +#include <__functional/invoke.h> +#include <__functional/is_transparent.h> +#include <__functional/operations.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/ranges_iterator_traits.h> +#include <__iterator/reverse_iterator.h> +#include <__memory/allocator_traits.h> +#include <__memory/uses_allocator.h> +#include <__memory/uses_allocator_construction.h> +#include <__ranges/concepts.h> +#include <__ranges/container_compatible_range.h> +#include <__ranges/drop_view.h> +#include <__ranges/ref_view.h> +#include <__ranges/subrange.h> +#include <__ranges/zip_view.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/container_traits.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_allocator.h> +#include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_same.h> +#include <__type_traits/maybe_const.h> +#include <__utility/exception_guard.h> +#include <__utility/pair.h> +#include +#include +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +template , + class _KeyContainer = vector<_Key>, + class _MappedContainer = vector<_Tp>> +class flat_map { + template + struct __iterator; + + template + friend class flat_map; + + static_assert(is_same_v<_Key, typename _KeyContainer::value_type>); + static_assert(is_same_v<_Tp, typename _MappedContainer::value_type>); + static_assert(!is_same_v<_KeyContainer, std::vector>, "vector is not a sequence container"); + static_assert(!is_same_v<_MappedContainer, std::vector>, "vector is not a sequence container"); + +public: + // types + using key_type = _Key; + using mapped_type = _Tp; + using value_type = pair; + using key_compare = __type_identity_t<_Compare>; + using reference = pair; + using const_reference = pair; + using size_type = size_t; + using difference_type = ptrdiff_t; + using iterator = __iterator; // see [container.requirements] + using const_iterator = __iterator; // see [container.requirements] + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using key_container_type = _KeyContainer; + using mapped_container_type = _MappedContainer; + + class value_compare { + private: + key_compare __comp_; + value_compare(key_compare __c) : __comp_(__c) {} + friend flat_map; + + public: + _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + return __comp_(__x.first, __y.first); + } + }; + + struct containers { + key_container_type keys; + mapped_container_type values; + }; + +private: + template + _LIBCPP_HIDE_FROM_ABI static constexpr bool __allocator_ctor_constraint = + _And, uses_allocator>::value; + + _LIBCPP_HIDE_FROM_ABI static constexpr bool __is_compare_transparent = __is_transparent_v<_Compare, _Compare>; + + template + struct __iterator { + private: + using __key_iterator = ranges::iterator_t; + using __mapped_iterator = ranges::iterator_t<__maybe_const<_Const, mapped_container_type>>; + using __reference = pair, iter_reference_t<__mapped_iterator>>; + + struct __arrow_proxy { + __reference __ref_; + _LIBCPP_HIDE_FROM_ABI __reference* operator->() { return std::addressof(__ref_); } + }; + + __key_iterator __key_iter_; + __mapped_iterator __mapped_iter_; + + friend flat_map; + + public: + using iterator_concept = random_access_iterator_tag; + // `flat_map::iterator` only satisfy "Cpp17InputIterator" named requirements, because + // its `reference` is not a reference type. + // However, to avoid surprising runtime behaviour when it is used with the + // Cpp17 algorithms or operations, iterator_category is set to random_access_iterator_tag. + using iterator_category = random_access_iterator_tag; + using value_type = flat_map::value_type; + using difference_type = flat_map::difference_type; + + _LIBCPP_HIDE_FROM_ABI __iterator() = default; + + _LIBCPP_HIDE_FROM_ABI __iterator(__iterator __i) + requires _Const && convertible_to, __key_iterator> && + convertible_to, __mapped_iterator> + : __key_iter_(std::move(__i.__key_iter_)), __mapped_iter_(std::move(__i.__mapped_iter_)) {} + + _LIBCPP_HIDE_FROM_ABI __iterator(__key_iterator __key_iter, __mapped_iterator __mapped_iter) + : __key_iter_(std::move(__key_iter)), __mapped_iter_(std::move(__mapped_iter)) {} + + _LIBCPP_HIDE_FROM_ABI __reference operator*() const { return __reference(*__key_iter_, *__mapped_iter_); } + _LIBCPP_HIDE_FROM_ABI __arrow_proxy operator->() const { return __arrow_proxy{**this}; } + + _LIBCPP_HIDE_FROM_ABI __iterator& operator++() { + ++__key_iter_; + ++__mapped_iter_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI __iterator operator++(int) { + __iterator __tmp(*this); + ++*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI __iterator& operator--() { + --__key_iter_; + --__mapped_iter_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI __iterator operator--(int) { + __iterator __tmp(*this); + --*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI __iterator& operator+=(difference_type __x) { + __key_iter_ += __x; + __mapped_iter_ += __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI __iterator& operator-=(difference_type __x) { + __key_iter_ -= __x; + __mapped_iter_ -= __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI __reference operator[](difference_type __n) const { return *(*this + __n); } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) { + return __x.__key_iter_ == __y.__key_iter_; + } + + _LIBCPP_HIDE_FROM_ABI friend bool operator<(const __iterator& __x, const __iterator& __y) { + return __x.__key_iter_ < __y.__key_iter_; + } + + _LIBCPP_HIDE_FROM_ABI friend bool operator>(const __iterator& __x, const __iterator& __y) { return __y < __x; } + + _LIBCPP_HIDE_FROM_ABI friend bool operator<=(const __iterator& __x, const __iterator& __y) { return !(__y < __x); } + + _LIBCPP_HIDE_FROM_ABI friend bool operator>=(const __iterator& __x, const __iterator& __y) { return !(__x < __y); } + + _LIBCPP_HIDE_FROM_ABI friend auto operator<=>(const __iterator& __x, const __iterator& __y) + requires three_way_comparable<__key_iterator> + { + return __x.__key_iter_ <=> __y.__key_iter_; + } + + _LIBCPP_HIDE_FROM_ABI friend __iterator operator+(const __iterator& __i, difference_type __n) { + auto __tmp = __i; + __tmp += __n; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI friend __iterator operator+(difference_type __n, const __iterator& __i) { return __i + __n; } + + _LIBCPP_HIDE_FROM_ABI friend __iterator operator-(const __iterator& __i, difference_type __n) { + auto __tmp = __i; + __tmp -= __n; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI friend difference_type operator-(const __iterator& __x, const __iterator& __y) { + return difference_type(__x.__key_iter_ - __y.__key_iter_); + } + }; + +public: + // [flat.map.cons], construct/copy/destroy + _LIBCPP_HIDE_FROM_ABI flat_map() noexcept( + is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_MappedContainer> && + is_nothrow_default_constructible_v<_Compare>) + : __containers_(), __compare_() {} + + _LIBCPP_HIDE_FROM_ABI flat_map(const flat_map&) = default; + + _LIBCPP_HIDE_FROM_ABI flat_map(flat_map&& __other) noexcept( + is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_MappedContainer> && + is_nothrow_move_constructible_v<_Compare>) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : __containers_(std::move(__other.__containers_)), __compare_(std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + // gcc does not like the `throw` keyword in a conditional noexcept function + if constexpr (!(is_nothrow_move_constructible_v<_KeyContainer> && + is_nothrow_move_constructible_v<_MappedContainer> && is_nothrow_move_constructible_v<_Compare>)) { + throw; + } +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(const flat_map& __other, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, + __alloc, + __other.__containers_.keys, + __other.__containers_.values, + __other.__compare_) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(flat_map&& __other, const _Allocator& __alloc) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : flat_map(__ctor_uses_allocator_tag{}, + __alloc, + std::move(__other.__containers_.keys), + std::move(__other.__containers_.values), + std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + throw; +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + _LIBCPP_HIDE_FROM_ABI flat_map( + key_container_type __key_cont, mapped_container_type __mapped_cont, const key_compare& __comp = key_compare()) + : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + __sort_and_unique(); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + __sort_and_unique(); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const key_compare& __comp, + const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + __sort_and_unique(); + } + + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, + key_container_type __key_cont, + mapped_container_type __mapped_cont, + const key_compare& __comp = key_compare()) + : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates"); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates"); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const key_compare& __comp, + const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates"); + } + + _LIBCPP_HIDE_FROM_ABI explicit flat_map(const key_compare& __comp) : __containers_(), __compare_(__comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(const key_compare& __comp, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI explicit flat_map(const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) {} + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI + flat_map(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __containers_(), __compare_(__comp) { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI + flat_map(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI flat_map(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) { + insert(__first, __last); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t __fr, _Range&& __rg) + : flat_map(__fr, std::forward<_Range>(__rg), key_compare()) {} + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) { + insert_range(std::forward<_Range>(__rg)); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_map(__comp) { + insert_range(std::forward<_Range>(__rg)); + } + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { + insert_range(std::forward<_Range>(__rg)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __containers_(), __compare_(__comp) { + insert(sorted_unique, __first, __last); + } + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, + _InputIterator __first, + _InputIterator __last, + const key_compare& __comp, + const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { + insert(sorted_unique, __first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) { + insert(sorted_unique, __first, __last); + } + + _LIBCPP_HIDE_FROM_ABI flat_map(initializer_list __il, const key_compare& __comp = key_compare()) + : flat_map(__il.begin(), __il.end(), __comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_map(__il.begin(), __il.end(), __comp, __alloc) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(initializer_list __il, const _Allocator& __alloc) + : flat_map(__il.begin(), __il.end(), __alloc) {} + + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, initializer_list __il, const key_compare& __comp = key_compare()) + : flat_map(sorted_unique, __il.begin(), __il.end(), __comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_map(sorted_unique, __il.begin(), __il.end(), __comp, __alloc) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(sorted_unique_t, initializer_list __il, const _Allocator& __alloc) + : flat_map(sorted_unique, __il.begin(), __il.end(), __alloc) {} + + _LIBCPP_HIDE_FROM_ABI flat_map& operator=(initializer_list __il) { + clear(); + insert(__il); + return *this; + } + + _LIBCPP_HIDE_FROM_ABI flat_map& operator=(const flat_map&) = default; + + _LIBCPP_HIDE_FROM_ABI flat_map& operator=(flat_map&& __other) noexcept( + is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_MappedContainer> && + is_nothrow_move_assignable_v<_Compare>) { + // No matter what happens, we always want to clear the other container before returning + // since we moved from it + auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; }); + { + // If an exception is thrown, we have no choice but to clear *this to preserve invariants + auto __on_exception = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __containers_ = std::move(__other.__containers_); + __compare_ = std::move(__other.__compare_); + __on_exception.__complete(); + } + return *this; + } + + // iterators + _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept { + return iterator(__containers_.keys.begin(), __containers_.values.begin()); + } + + _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept { + return const_iterator(__containers_.keys.begin(), __containers_.values.begin()); + } + + _LIBCPP_HIDE_FROM_ABI iterator end() noexcept { + return iterator(__containers_.keys.end(), __containers_.values.end()); + } + + _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept { + return const_iterator(__containers_.keys.end(), __containers_.values.end()); + } + + _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + + _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); } + _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } + + // [flat.map.capacity], capacity + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __containers_.keys.empty(); } + + _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __containers_.keys.size(); } + + _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept { + return std::min(__containers_.keys.max_size(), __containers_.values.max_size()); + } + + // [flat.map.access], element access + _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](const key_type& __x) + requires is_constructible_v + { + return try_emplace(__x).first->second; + } + + _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __x) + requires is_constructible_v + { + return try_emplace(std::move(__x)).first->second; + } + + template + requires(__is_compare_transparent && is_constructible_v && is_constructible_v && + !is_convertible_v<_Kp &&, const_iterator> && !is_convertible_v<_Kp &&, iterator>) + _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](_Kp&& __x) { + return try_emplace(std::forward<_Kp>(__x)).first->second; + } + + _LIBCPP_HIDE_FROM_ABI mapped_type& at(const key_type& __x) { + auto __it = find(__x); + if (__it == end()) { + std::__throw_out_of_range("flat_map::at(const key_type&): Key does not exist"); + } + return __it->second; + } + + _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const key_type& __x) const { + auto __it = find(__x); + if (__it == end()) { + std::__throw_out_of_range("flat_map::at(const key_type&) const: Key does not exist"); + } + return __it->second; + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI mapped_type& at(const _Kp& __x) { + auto __it = find(__x); + if (__it == end()) { + std::__throw_out_of_range("flat_map::at(const K&): Key does not exist"); + } + return __it->second; + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const _Kp& __x) const { + auto __it = find(__x); + if (__it == end()) { + std::__throw_out_of_range("flat_map::at(const K&) const: Key does not exist"); + } + return __it->second; + } + + // [flat.map.modifiers], modifiers + template + requires is_constructible_v, _Args...> + _LIBCPP_HIDE_FROM_ABI pair emplace(_Args&&... __args) { + std::pair __pair(std::forward<_Args>(__args)...); + return __try_emplace(std::move(__pair.first), std::move(__pair.second)); + } + + template + requires is_constructible_v, _Args...> + _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) { + std::pair __pair(std::forward<_Args>(__args)...); + return __try_emplace_hint(__hint, std::move(__pair.first), std::move(__pair.second)).first; + } + + _LIBCPP_HIDE_FROM_ABI pair insert(const value_type& __x) { return emplace(__x); } + + _LIBCPP_HIDE_FROM_ABI pair insert(value_type&& __x) { return emplace(std::move(__x)); } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) { + return emplace_hint(__hint, __x); + } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) { + return emplace_hint(__hint, std::move(__x)); + } + + template + requires is_constructible_v, _Pp> + _LIBCPP_HIDE_FROM_ABI pair insert(_Pp&& __x) { + return emplace(std::forward<_Pp>(__x)); + } + + template + requires is_constructible_v, _Pp> + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, _Pp&& __x) { + return emplace_hint(__hint, std::forward<_Pp>(__x)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + __append_sort_merge_unique(std::move(__first), std::move(__last)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + void insert(sorted_unique_t, _InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + + __append_sort_merge_unique(std::move(__first), std::move(__last)); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) { + if constexpr (ranges::sized_range<_Range>) { + __reserve(ranges::size(__range)); + } + + __append_sort_merge_unique(ranges::begin(__range), ranges::end(__range)); + } + + _LIBCPP_HIDE_FROM_ABI void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } + + _LIBCPP_HIDE_FROM_ABI void insert(sorted_unique_t, initializer_list __il) { + insert(sorted_unique, __il.begin(), __il.end()); + } + + _LIBCPP_HIDE_FROM_ABI containers extract() && { + auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); + auto __ret = std::move(__containers_); + return __ret; + } + + _LIBCPP_HIDE_FROM_ABI void replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE( + __key_cont.size() == __mapped_cont.size(), "flat_map keys and mapped containers have different size"); + + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__key_cont), "Either the key container is not sorted or it contains duplicates"); + auto __guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __containers_.keys = std::move(__key_cont); + __containers_.values = std::move(__mapped_cont); + __guard.__complete(); + } + + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair try_emplace(const key_type& __key, _Args&&... __args) { + return __try_emplace(__key, std::forward<_Args>(__args)...); + } + + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair try_emplace(key_type&& __key, _Args&&... __args) { + return __try_emplace(std::move(__key), std::forward<_Args>(__args)...); + } + + template + requires(__is_compare_transparent && is_constructible_v && + is_constructible_v && !is_convertible_v<_Kp &&, const_iterator> && + !is_convertible_v<_Kp &&, iterator>) + _LIBCPP_HIDE_FROM_ABI pair try_emplace(_Kp&& __key, _Args&&... __args) { + return __try_emplace(std::forward<_Kp>(__key), std::forward<_Args>(__args)...); + } + + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, const key_type& __key, _Args&&... __args) { + return __try_emplace_hint(__hint, __key, std::forward<_Args>(__args)...).first; + } + + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, key_type&& __key, _Args&&... __args) { + return __try_emplace_hint(__hint, std::move(__key), std::forward<_Args>(__args)...).first; + } + + template + requires __is_compare_transparent && is_constructible_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, _Kp&& __key, _Args&&... __args) { + return __try_emplace_hint(__hint, std::forward<_Kp>(__key), std::forward<_Args>(__args)...).first; + } + + template + requires is_assignable_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair insert_or_assign(const key_type& __key, _Mapped&& __obj) { + return __insert_or_assign(__key, std::forward<_Mapped>(__obj)); + } + + template + requires is_assignable_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair insert_or_assign(key_type&& __key, _Mapped&& __obj) { + return __insert_or_assign(std::move(__key), std::forward<_Mapped>(__obj)); + } + + template + requires __is_compare_transparent && is_constructible_v && is_assignable_v && + is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair insert_or_assign(_Kp&& __key, _Mapped&& __obj) { + return __insert_or_assign(std::forward<_Kp>(__key), std::forward<_Mapped>(__obj)); + } + + template + requires is_assignable_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, const key_type& __key, _Mapped&& __obj) { + return __insert_or_assign(__hint, __key, std::forward<_Mapped>(__obj)); + } + + template + requires is_assignable_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, key_type&& __key, _Mapped&& __obj) { + return __insert_or_assign(__hint, std::move(__key), std::forward<_Mapped>(__obj)); + } + + template + requires __is_compare_transparent && is_constructible_v && is_assignable_v && + is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __obj) { + return __insert_or_assign(__hint, std::forward<_Kp>(__key), std::forward<_Mapped>(__obj)); + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) { + return __erase(__position.__key_iter_, __position.__mapped_iter_); + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position) { + return __erase(__position.__key_iter_, __position.__mapped_iter_); + } + + _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) { + auto __iter = find(__x); + if (__iter != end()) { + erase(__iter); + return 1; + } + return 0; + } + + template + requires(__is_compare_transparent && !is_convertible_v<_Kp &&, iterator> && + !is_convertible_v<_Kp &&, const_iterator>) + _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) { + auto [__first, __last] = equal_range(__x); + auto __res = __last - __first; + erase(__first, __last); + return __res; + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_it = __containers_.keys.erase(__first.__key_iter_, __last.__key_iter_); + auto __mapped_it = __containers_.values.erase(__first.__mapped_iter_, __last.__mapped_iter_); + __on_failure.__complete(); + return iterator(std::move(__key_it), std::move(__mapped_it)); + } + + _LIBCPP_HIDE_FROM_ABI void swap(flat_map& __y) noexcept { + // warning: The spec has unconditional noexcept, which means that + // if any of the following functions throw an exception, + // std::terminate will be called. + // This is discussed in P2767, which hasn't been voted on yet. + ranges::swap(__compare_, __y.__compare_); + ranges::swap(__containers_.keys, __y.__containers_.keys); + ranges::swap(__containers_.values, __y.__containers_.values); + } + + _LIBCPP_HIDE_FROM_ABI void clear() noexcept { + __containers_.keys.clear(); + __containers_.values.clear(); + } + + // observers + _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; } + _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__compare_); } + + _LIBCPP_HIDE_FROM_ABI const key_container_type& keys() const noexcept { return __containers_.keys; } + _LIBCPP_HIDE_FROM_ABI const mapped_container_type& values() const noexcept { return __containers_.values; } + + // map operations + _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) { + return __find_impl(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const { + return __find_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const { return contains(__x) ? 1 : 0; } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const { + return contains(__x) ? 1 : 0; + } + + _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const { + return find(__x) != end(); + } + + _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { return __lower_bound(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const { + return __lower_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) { + return __lower_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const { + return __lower_bound(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { return __upper_bound(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const { + return __upper_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) { + return __upper_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const { + return __upper_bound(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) { + return __equal_range_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) const { + return __equal_range_impl(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) { + return __equal_range_impl(*this, __x); + } + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) const { + return __equal_range_impl(*this, __x); + } + + friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_map& __x, const flat_map& __y) { + return ranges::equal(__x, __y); + } + + friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_map& __x, const flat_map& __y) { + return std::lexicographical_compare_three_way( + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); + } + + friend _LIBCPP_HIDE_FROM_ABI void swap(flat_map& __x, flat_map& __y) noexcept { __x.swap(__y); } + +private: + struct __ctor_uses_allocator_tag { + explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_tag() = default; + }; + struct __ctor_uses_allocator_empty_tag { + explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_empty_tag() = default; + }; + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(__ctor_uses_allocator_tag, + const _Allocator& __alloc, + _KeyCont&& __key_cont, + _MappedCont&& __mapped_cont, + _CompArg&&... __comp) + : __containers_{.keys = std::make_obj_using_allocator( + __alloc, std::forward<_KeyCont>(__key_cont)), + .values = std::make_obj_using_allocator( + __alloc, std::forward<_MappedCont>(__mapped_cont))}, + __compare_(std::forward<_CompArg>(__comp)...) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp) + : __containers_{.keys = std::make_obj_using_allocator(__alloc), + .values = std::make_obj_using_allocator(__alloc)}, + __compare_(std::forward<_CompArg>(__comp)...) {} + + _LIBCPP_HIDE_FROM_ABI bool __is_sorted_and_unique(auto&& __key_container) const { + auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) { return !__compare_(__x, __y); }; + return ranges::adjacent_find(__key_container, __greater_or_equal_to) == ranges::end(__key_container); + } + + // This function is only used in constructors. So there is not exception handling in this function. + // If the function exits via an exception, there will be no flat_map object constructed, thus, there + // is no invariant state to preserve + _LIBCPP_HIDE_FROM_ABI void __sort_and_unique() { + auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); + // To be consistent with std::map's behaviour, we use stable_sort instead of sort. + // As a result, if there are duplicated keys, the first value in the original order will be taken. + ranges::stable_sort(__zv, __compare_, [](const auto& __p) -> decltype(auto) { return std::get<0>(__p); }); + auto __dup_start = ranges::unique(__zv, __key_equiv(__compare_)).begin(); + auto __dist = ranges::distance(__zv.begin(), __dup_start); + __containers_.keys.erase(__containers_.keys.begin() + __dist, __containers_.keys.end()); + __containers_.values.erase(__containers_.values.begin() + __dist, __containers_.values.end()); + } + + template + _LIBCPP_HIDE_FROM_ABI size_type __append(_InputIterator __first, _Sentinel __last) { + size_type __num_of_appended = 0; + for (; __first != __last; ++__first) { + value_type __kv = *__first; + __containers_.keys.insert(__containers_.keys.end(), std::move(__kv.first)); + __containers_.values.insert(__containers_.values.end(), std::move(__kv.second)); + ++__num_of_appended; + } + return __num_of_appended; + } + + template + _LIBCPP_HIDE_FROM_ABI void __append_sort_merge_unique(_InputIterator __first, _Sentinel __last) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + size_t __num_of_appended = __append(std::move(__first), std::move(__last)); + if (__num_of_appended != 0) { + auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); + auto __append_start_offset = __containers_.keys.size() - __num_of_appended; + auto __end = __zv.end(); + auto __compare_key = [this](const auto& __p1, const auto& __p2) { + return __compare_(std::get<0>(__p1), std::get<0>(__p2)); + }; + if constexpr (!_WasSorted) { + ranges::stable_sort(__zv.begin() + __append_start_offset, __end, __compare_key); + } else { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__containers_.keys | ranges::views::drop(__append_start_offset)), + "Either the key container is not sorted or it contains duplicates"); + } + ranges::inplace_merge(__zv.begin(), __zv.begin() + __append_start_offset, __end, __compare_key); + + auto __dup_start = ranges::unique(__zv, __key_equiv(__compare_)).begin(); + auto __dist = ranges::distance(__zv.begin(), __dup_start); + __containers_.keys.erase(__containers_.keys.begin() + __dist, __containers_.keys.end()); + __containers_.values.erase(__containers_.values.begin() + __dist, __containers_.values.end()); + } + __on_failure.__complete(); + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) { + auto __it = __self.lower_bound(__key); + auto __last = __self.end(); + if (__it == __last || __self.__compare_(__key, __it->first)) { + return __last; + } + return __it; + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __key_equal_range(_Self&& __self, const _Kp& __key) { + auto __it = ranges::lower_bound(__self.__containers_.keys, __key, __self.__compare_); + auto __last = __self.__containers_.keys.end(); + if (__it == __last || __self.__compare_(__key, *__it)) { + return std::make_pair(__it, __it); + } + return std::make_pair(__it, std::next(__it)); + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { + auto [__key_first, __key_last] = __key_equal_range(__self, __key); + + const auto __make_mapped_iter = [&](const auto& __key_iter) { + return __self.__containers_.values.begin() + + static_cast>( + ranges::distance(__self.__containers_.keys.begin(), __key_iter)); + }; + + using __iterator_type = ranges::iterator_t; + return std::make_pair(__iterator_type(__key_first, __make_mapped_iter(__key_first)), + __iterator_type(__key_last, __make_mapped_iter(__key_last))); + } + + template + _LIBCPP_HIDE_FROM_ABI static _Res __lower_bound(_Self&& __self, _Kp& __x) { + return __binary_search<_Res>(__self, ranges::lower_bound, __x); + } + + template + _LIBCPP_HIDE_FROM_ABI static _Res __upper_bound(_Self&& __self, _Kp& __x) { + return __binary_search<_Res>(__self, ranges::upper_bound, __x); + } + + template + _LIBCPP_HIDE_FROM_ABI static _Res __binary_search(_Self&& __self, _Fn __search_fn, _Kp& __x) { + auto __key_iter = __search_fn(__self.__containers_.keys, __x, __self.__compare_); + auto __mapped_iter = + __self.__containers_.values.begin() + + static_cast>( + ranges::distance(__self.__containers_.keys.begin(), __key_iter)); + + return _Res(std::move(__key_iter), std::move(__mapped_iter)); + } + + template + _LIBCPP_HIDE_FROM_ABI pair __try_emplace(_KeyArg&& __key, _MArgs&&... __mapped_args) { + auto __key_it = ranges::lower_bound(__containers_.keys, __key, __compare_); + auto __mapped_it = __containers_.values.begin() + ranges::distance(__containers_.keys.begin(), __key_it); + + if (__key_it == __containers_.keys.end() || __compare_(__key, *__key_it)) { + return pair( + __try_emplace_exact_hint( + std::move(__key_it), + std::move(__mapped_it), + std::forward<_KeyArg>(__key), + std::forward<_MArgs>(__mapped_args)...), + true); + } else { + return pair(iterator(std::move(__key_it), std::move(__mapped_it)), false); + } + } + + template + _LIBCPP_HIDE_FROM_ABI bool __is_hint_correct(const_iterator __hint, _Kp&& __key) { + if (__hint != cbegin() && !__compare_((__hint - 1)->first, __key)) { + return false; + } + if (__hint != cend() && __compare_(__hint->first, __key)) { + return false; + } + return true; + } + + template + _LIBCPP_HIDE_FROM_ABI pair __try_emplace_hint(const_iterator __hint, _Kp&& __key, _Args&&... __args) { + if (__is_hint_correct(__hint, __key)) { + if (__hint == cend() || __compare_(__key, __hint->first)) { + return { + __try_emplace_exact_hint( + __hint.__key_iter_, __hint.__mapped_iter_, std::forward<_Kp>(__key), std::forward<_Args>(__args)...), + true}; + } else { + // key equals + auto __dist = __hint - cbegin(); + return {iterator(__containers_.keys.begin() + __dist, __containers_.values.begin() + __dist), false}; + } + } else { + return __try_emplace(std::forward<_Kp>(__key), std::forward<_Args>(__args)...); + } + } + + template + _LIBCPP_HIDE_FROM_ABI iterator + __try_emplace_exact_hint(_IterK&& __it_key, _IterM&& __it_mapped, _KeyArg&& __key, _MArgs&&... __mapped_args) { + auto __on_key_failed = std::__make_exception_guard([&]() noexcept { + if constexpr (__container_traits<_KeyContainer>::__emplacement_has_strong_exception_safety_guarantee) { + // Nothing to roll back! + } else { + // we need to clear both because we don't know the state of our keys anymore + clear() /* noexcept */; + } + }); + auto __key_it = __containers_.keys.emplace(__it_key, std::forward<_KeyArg>(__key)); + __on_key_failed.__complete(); + + auto __on_value_failed = std::__make_exception_guard([&]() noexcept { + if constexpr (!__container_traits<_MappedContainer>::__emplacement_has_strong_exception_safety_guarantee) { + // we need to clear both because we don't know the state of our values anymore + clear() /* noexcept */; + } else { + // In this case, we know the values are just like before we attempted emplacement, + // and we also know that the keys have been emplaced successfully. Just roll back the keys. +# if _LIBCPP_HAS_EXCEPTIONS + try { +# endif // _LIBCPP_HAS_EXCEPTIONS + __containers_.keys.erase(__key_it); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + // Now things are funky for real. We're failing to rollback the keys. + // Just give up and clear the whole thing. + // + // Also, swallow the exception that happened during the rollback and let the + // original value-emplacement exception propagate normally. + clear() /* noexcept */; + } +# endif // _LIBCPP_HAS_EXCEPTIONS + } + }); + auto __mapped_it = __containers_.values.emplace(__it_mapped, std::forward<_MArgs>(__mapped_args)...); + __on_value_failed.__complete(); + + return iterator(std::move(__key_it), std::move(__mapped_it)); + } + + template + _LIBCPP_HIDE_FROM_ABI pair __insert_or_assign(_Kp&& __key, _Mapped&& __mapped) { + auto __r = try_emplace(std::forward<_Kp>(__key), std::forward<_Mapped>(__mapped)); + if (!__r.second) { + __r.first->second = std::forward<_Mapped>(__mapped); + } + return __r; + } + + template + _LIBCPP_HIDE_FROM_ABI iterator __insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __mapped) { + auto __r = __try_emplace_hint(__hint, std::forward<_Kp>(__key), std::forward<_Mapped>(__mapped)); + if (!__r.second) { + __r.first->second = std::forward<_Mapped>(__mapped); + } + return __r.first; + } + + _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) { + if constexpr (requires { __containers_.keys.reserve(__size); }) { + __containers_.keys.reserve(__size); + } + + if constexpr (requires { __containers_.values.reserve(__size); }) { + __containers_.values.reserve(__size); + } + } + + template + _LIBCPP_HIDE_FROM_ABI iterator __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_iter = __containers_.keys.erase(__key_iter_to_remove); + auto __mapped_iter = __containers_.values.erase(__mapped_iter_to_remove); + __on_failure.__complete(); + return iterator(std::move(__key_iter), std::move(__mapped_iter)); + } + + template + friend typename flat_map<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>::size_type + erase_if(flat_map<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate); + + containers __containers_; + [[no_unique_address]] key_compare __compare_; + + struct __key_equiv { + _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x)); + } + key_compare __comp_; + }; +}; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && + is_invocable_v) +flat_map(_KeyContainer, _MappedContainer, _Compare = _Compare()) + -> flat_map; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && + !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) +flat_map(_KeyContainer, _MappedContainer, _Allocator) + -> flat_map, + _KeyContainer, + _MappedContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && + uses_allocator_v<_MappedContainer, _Allocator> && + is_invocable_v) +flat_map(_KeyContainer, _MappedContainer, _Compare, _Allocator) + -> flat_map; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && + is_invocable_v) +flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare = _Compare()) + -> flat_map; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && + !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) +flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Allocator) + -> flat_map, + _KeyContainer, + _MappedContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && + uses_allocator_v<_MappedContainer, _Allocator> && + is_invocable_v) +flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare, _Allocator) + -> flat_map; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_map(_InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_map(sorted_unique_t, _InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; + +template >, + class _Allocator = allocator, + class = __enable_if_t::value && __is_allocator<_Allocator>::value>> +flat_map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) + -> flat_map< + __range_key_type<_Range>, + __range_mapped_type<_Range>, + _Compare, + vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>, + vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; + +template ::value>> +flat_map(from_range_t, _Range&&, _Allocator) + -> flat_map< + __range_key_type<_Range>, + __range_mapped_type<_Range>, + less<__range_key_type<_Range>>, + vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>, + vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_map(initializer_list>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_map(sorted_unique_t, initializer_list>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>; + +template +struct uses_allocator, _Allocator> + : bool_constant && uses_allocator_v<_MappedContainer, _Allocator>> {}; + +template +_LIBCPP_HIDE_FROM_ABI typename flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type +erase_if(flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_map, _Predicate __pred) { + auto __zv = ranges::views::zip(__flat_map.__containers_.keys, __flat_map.__containers_.values); + auto __first = __zv.begin(); + auto __last = __zv.end(); + auto __guard = std::__make_exception_guard([&] { __flat_map.clear(); }); + auto __it = std::remove_if(__first, __last, [&](auto&& __zipped) -> bool { + using _Ref = typename flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::const_reference; + return __pred(_Ref(std::get<0>(__zipped), std::get<1>(__zipped))); + }); + auto __res = __last - __it; + auto __offset = __it - __first; + + const auto __erase_container = [&](auto& __cont) { __cont.erase(__cont.begin() + __offset, __cont.end()); }; + + __erase_container(__flat_map.__containers_.keys); + __erase_container(__flat_map.__containers_.values); + + __guard.__complete(); + return __res; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___FLAT_MAP_FLAT_MAP_H diff --git a/libcxx/include/__flat_map/sorted_unique.h b/libcxx/include/__flat_map/sorted_unique.h new file mode 100644 index 000000000000000..0189a5ff1d56843 --- /dev/null +++ b/libcxx/include/__flat_map/sorted_unique.h @@ -0,0 +1,31 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef _LIBCPP___FLAT_MAP_SORTED_UNIQUE_H +#define _LIBCPP___FLAT_MAP_SORTED_UNIQUE_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +struct sorted_unique_t { + explicit sorted_unique_t() = default; +}; +inline constexpr sorted_unique_t sorted_unique{}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +#endif // _LIBCPP___FLAT_MAP_SORTED_UNIQUE_H diff --git a/libcxx/include/__utility/exception_guard.h b/libcxx/include/__utility/exception_guard.h index a03bd7e8f352277..00b835d3e2a2fc3 100644 --- a/libcxx/include/__utility/exception_guard.h +++ b/libcxx/include/__utility/exception_guard.h @@ -137,6 +137,12 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __exception_guard<_Rollback> __make_exce return __exception_guard<_Rollback>(std::move(__rollback)); } +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __exception_guard_exceptions<_Rollback> +__make_scope_guard(_Rollback __rollback) { + return __exception_guard_exceptions<_Rollback>(std::move(__rollback)); +} + _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/libcxx/include/flat_map b/libcxx/include/flat_map new file mode 100644 index 000000000000000..15d79dd1ddca34b --- /dev/null +++ b/libcxx/include/flat_map @@ -0,0 +1,54 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_FLAT_MAP +#define _LIBCPP_FLAT_MAP + +/* + Header synopsis + +#include // see [compare.syn] +#include // see [initializer.list.syn] + +namespace std { + // [flat.map], class template flat_map + template, + class KeyContainer = vector, class MappedContainer = vector> + class flat_map; + + struct sorted_unique_t { explicit sorted_unique_t() = default; }; + inline constexpr sorted_unique_t sorted_unique{}; + + template + struct uses_allocator, + Allocator>; + + // [flat.map.erasure], erasure for flat_map + template + typename flat_map::size_type + erase_if(flat_map& c, Predicate pred); +*/ + +#include <__assert> // all public C++ headers provide the assertion handler +#include <__config> +#include <__flat_map/flat_map.h> +#include <__flat_map/sorted_unique.h> +#include + +// standard required includes +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#endif // _LIBCPP_FLAT_MAP diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 05d08cfbd7cd294..c3561590e06d8a7 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1222,6 +1222,14 @@ module std [system] { export * } + module flat_map { + module flat_map { header "__flat_map/flat_map.h" } + module sorted_unique { header "__flat_map/sorted_unique.h" } + + header "flat_map" + export * + } + module format { module buffer { header "__format/buffer.h" } module concepts { header "__format/concepts.h" } diff --git a/libcxx/modules/std.compat.cppm.in b/libcxx/modules/std.compat.cppm.in index 797b413f68e2724..fbc2c7d94cfabb3 100644 --- a/libcxx/modules/std.compat.cppm.in +++ b/libcxx/modules/std.compat.cppm.in @@ -53,9 +53,6 @@ module; # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() -# if __has_include() -# error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" -# endif // __has_include() # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() diff --git a/libcxx/modules/std.cppm.in b/libcxx/modules/std.cppm.in index 64ed8d4088cc018..b4889e5a69e49bd 100644 --- a/libcxx/modules/std.cppm.in +++ b/libcxx/modules/std.cppm.in @@ -64,6 +64,7 @@ module; #include #include #include +#include #include #include #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) @@ -161,9 +162,6 @@ module; # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() -# if __has_include() -# error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" -# endif // __has_include() # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() diff --git a/libcxx/modules/std/flat_map.inc b/libcxx/modules/std/flat_map.inc index 83cd20ad6189460..6a86229bceaba9d 100644 --- a/libcxx/modules/std/flat_map.inc +++ b/libcxx/modules/std/flat_map.inc @@ -8,8 +8,8 @@ //===----------------------------------------------------------------------===// export namespace std { -#if 0 - // [flat.map], class template flat_­map +#if _LIBCPP_STD_VER >= 23 + // [flat.map], class template flat_map using std::flat_map; using std::sorted_unique; @@ -17,15 +17,17 @@ export namespace std { using std::uses_allocator; - // [flat.map.erasure], erasure for flat_­map + // [flat.map.erasure], erasure for flat_map using std::erase_if; - // [flat.multimap], class template flat_­multimap +#endif // _LIBCPP_STD_VER >= 23 +#if 0 + // [flat.multimap], class template flat_multimap using std::flat_multimap; using std::sorted_equivalent; using std::sorted_equivalent_t; - // [flat.multimap.erasure], erasure for flat_­multimap + // [flat.multimap.erasure], erasure for flat_multimap #endif } // namespace std diff --git a/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.input_range.pass.cpp b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.input_range.pass.cpp new file mode 100644 index 000000000000000..2db803b53441f1d --- /dev/null +++ b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.input_range.pass.cpp @@ -0,0 +1,66 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: has-unix-headers +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// UNSUPPORTED: libcpp-hardening-mode=none +// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing + +// + +// flat_map(key_container_type , mapped_container_type , const key_compare& __comp = key_compare()) +// flat_map(const key_container_type& , const mapped_container_type& , const _Allocator& ) +// flat_map(const key_container_type& , const mapped_container_type& , const key_compare&, const _Allocator& ) +// void replace(key_container_type&& , mapped_container_type&&) +// + +#include +#include +#include +#include + +#include "check_assertion.h" + +int main(int, char**) { + using M = std::flat_map; + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { M m({1, 2, 3}, {4}); }()), "flat_map keys and mapped containers have different size"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { M m({1, 2, 3}, {4}, std::less{}); }()), "flat_map keys and mapped containers have different size"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector keys{1, 2, 3}; + const std::vector values{4}; + const std::allocator alloc{}; + M m(keys, values, alloc); + }()), + "flat_map keys and mapped containers have different size"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector keys{1, 2, 3}; + const std::vector values{4}; + const std::less key_compare{}; + const std::allocator alloc{}; + M m(keys, values, key_compare, alloc); + }()), + "flat_map keys and mapped containers have different size"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::vector keys{1, 2, 3}; + std::vector values{4}; + M m; + m.replace(std::move(keys), std::move(values)); + }()), + "flat_map keys and mapped containers have different size"); + + return 0; +} diff --git a/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.sorted_unique.pass.cpp b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.sorted_unique.pass.cpp new file mode 100644 index 000000000000000..e6bd3f385af9cb6 --- /dev/null +++ b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.sorted_unique.pass.cpp @@ -0,0 +1,225 @@ +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: has-unix-headers +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// UNSUPPORTED: libcpp-hardening-mode=none +// REQUIRES: libcpp-hardening-mode=debug +// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing + +// + +// flat_map(key_container_type , mapped_container_type , const key_compare& __comp = key_compare()) +// flat_map(const key_container_type& , const mapped_container_type& , const _Allocator& ) +// flat_map(const key_container_type& , const mapped_container_type& , const key_compare&, const _Allocator& ) +// void replace(key_container_type&& , mapped_container_type&&) +// + +#include +#include +#include +#include +#include +#include + +#include "check_assertion.h" + +int main(int, char**) { + using M = std::flat_map; + + TEST_LIBCPP_ASSERT_FAILURE(([] { M m(std::sorted_unique, {2, 2, 3}, {4, 5, 6}); }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE(([] { M m(std::sorted_unique, {4, 2, 3}, {4, 5, 6}); }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE(([] { M m(std::sorted_unique, {2, 2, 3}, {4, 5, 6}, std::less{}); }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE(([] { M m(std::sorted_unique, {4, 2, 3}, {4, 5, 6}, std::less{}); }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector keys{2, 2, 3}; + const std::vector values{4, 5, 6}; + const std::allocator alloc{}; + M m(std::sorted_unique, keys, values, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector keys{4, 2, 3}; + const std::vector values{4, 5, 6}; + const std::allocator alloc{}; + M m(std::sorted_unique, keys, values, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector keys{2, 2, 3}; + const std::vector values{4, 5, 6}; + const std::allocator alloc{}; + const std::less comp{}; + M m(std::sorted_unique, keys, values, comp, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector keys{4, 2, 3}; + const std::vector values{4, 5, 6}; + const std::allocator alloc{}; + const std::less comp{}; + M m(std::sorted_unique, keys, values, comp, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector> v{{2, 4}, {2, 5}, {3, 6}}; + const std::less comp{}; + M m(std::sorted_unique, v.begin(), v.end(), comp); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector> v{{4, 4}, {2, 5}, {3, 6}}; + const std::less comp{}; + M m(std::sorted_unique, v.begin(), v.end(), comp); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector> v{{2, 4}, {2, 5}, {3, 6}}; + const std::less comp{}; + const std::allocator alloc{}; + M m(std::sorted_unique, v.begin(), v.end(), comp, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector> v{{4, 4}, {2, 5}, {3, 6}}; + const std::less comp{}; + const std::allocator alloc{}; + M m(std::sorted_unique, v.begin(), v.end(), comp, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector> v{{2, 4}, {2, 5}, {3, 6}}; + const std::allocator alloc{}; + M m(std::sorted_unique, v.begin(), v.end(), alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector> v{{4, 4}, {2, 5}, {3, 6}}; + const std::allocator alloc{}; + M m(std::sorted_unique, v.begin(), v.end(), alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::initializer_list> v{{2, 4}, {2, 5}, {3, 6}}; + const std::less comp{}; + M m(std::sorted_unique, v, comp); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::initializer_list> v{{4, 4}, {2, 5}, {3, 6}}; + const std::less comp{}; + M m(std::sorted_unique, v, comp); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::initializer_list> v{{2, 4}, {2, 5}, {3, 6}}; + const std::less comp{}; + const std::allocator alloc{}; + M m(std::sorted_unique, v, comp, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::initializer_list> v{{4, 4}, {2, 5}, {3, 6}}; + const std::less comp{}; + const std::allocator alloc{}; + M m(std::sorted_unique, v, comp, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::initializer_list> v{{2, 4}, {2, 5}, {3, 6}}; + const std::allocator alloc{}; + M m(std::sorted_unique, v, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::initializer_list> v{{4, 4}, {2, 5}, {3, 6}}; + const std::allocator alloc{}; + M m(std::sorted_unique, v, alloc); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector> v{{2, 4}, {2, 5}, {3, 6}}; + M m; + m.insert(std::sorted_unique, v.begin(), v.end()); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + const std::vector> v{{4, 4}, {2, 5}, {3, 6}}; + M m; + m.insert(std::sorted_unique, v.begin(), v.end()); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::initializer_list> v{{2, 4}, {2, 5}, {3, 6}}; + M m; + m.insert(std::sorted_unique, v); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::initializer_list> v{{4, 4}, {2, 5}, {3, 6}}; + M m; + m.insert(std::sorted_unique, v); + }()), + "Either the key container is not sorted or it contains duplicates"); + + TEST_LIBCPP_ASSERT_FAILURE( + ([] { + std::vector keys{1, 1, 3}; + std::vector values{4, 5, 6}; + M m; + m.replace(std::move(keys), std::move(values)); + }()), + "Either the key container is not sorted or it contains duplicates"); + return 0; +} diff --git a/libcxx/test/libcxx/containers/containers.adaptors/flat.map/container_stability.pass.cpp b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/container_stability.pass.cpp new file mode 100644 index 000000000000000..0d90c3250061ff4 --- /dev/null +++ b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/container_stability.pass.cpp @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(key_container_type key_cont, mapped_container_type mapped_cont); +// +// libc++ uses stable_sort to ensure that flat_map's behavior matches map's, +// in terms of which duplicate items are kept. +// This tests a conforming extension. + +#include +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" + +struct Mod256 { + bool operator()(int x, int y) const { return (x % 256) < (y % 256); } +}; + +int main(int, char**) { + std::mt19937 randomness; + std::vector values; + std::vector> pairs; + for (int i = 0; i < 200; ++i) { + uint16_t r = randomness(); + values.push_back(r); + pairs.emplace_back(r, r); + } + + { + std::map m(pairs.begin(), pairs.end()); + std::flat_map fm(values, values); + assert(fm.size() == m.size()); + LIBCPP_ASSERT(std::ranges::equal(fm, m)); + } + { + std::map m(pairs.begin(), pairs.end()); + std::flat_map fm(values, values, Mod256()); + assert(fm.size() == m.size()); + LIBCPP_ASSERT(std::ranges::equal(fm, m)); + } + { + std::map m(pairs.begin(), pairs.end()); + std::flat_map fm(values, values, std::allocator()); + assert(fm.size() == m.size()); + LIBCPP_ASSERT(std::ranges::equal(fm, m)); + } + { + std::map m(pairs.begin(), pairs.end()); + std::flat_map fm(values, values, Mod256(), std::allocator()); + assert(fm.size() == m.size()); + LIBCPP_ASSERT(std::ranges::equal(fm, m)); + } + return 0; +} diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 506b5cd02c4495e..2dc84963f0891ee 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -682,6 +682,50 @@ filesystem typeinfo filesystem utility filesystem variant filesystem version +flat_map algorithm +flat_map array +flat_map atomic +flat_map bit +flat_map cctype +flat_map cerrno +flat_map climits +flat_map clocale +flat_map cmath +flat_map compare +flat_map concepts +flat_map cstdarg +flat_map cstddef +flat_map cstdint +flat_map cstdio +flat_map cstdlib +flat_map cstring +flat_map ctime +flat_map cwchar +flat_map cwctype +flat_map exception +flat_map initializer_list +flat_map ios +flat_map iosfwd +flat_map iterator +flat_map limits +flat_map locale +flat_map memory +flat_map mutex +flat_map new +flat_map optional +flat_map ratio +flat_map stdexcept +flat_map streambuf +flat_map string +flat_map string_view +flat_map system_error +flat_map tuple +flat_map type_traits +flat_map typeinfo +flat_map utility +flat_map variant +flat_map vector +flat_map version format algorithm format array format atomic diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index 506b5cd02c4495e..2dc84963f0891ee 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -682,6 +682,50 @@ filesystem typeinfo filesystem utility filesystem variant filesystem version +flat_map algorithm +flat_map array +flat_map atomic +flat_map bit +flat_map cctype +flat_map cerrno +flat_map climits +flat_map clocale +flat_map cmath +flat_map compare +flat_map concepts +flat_map cstdarg +flat_map cstddef +flat_map cstdint +flat_map cstdio +flat_map cstdlib +flat_map cstring +flat_map ctime +flat_map cwchar +flat_map cwctype +flat_map exception +flat_map initializer_list +flat_map ios +flat_map iosfwd +flat_map iterator +flat_map limits +flat_map locale +flat_map memory +flat_map mutex +flat_map new +flat_map optional +flat_map ratio +flat_map stdexcept +flat_map streambuf +flat_map string +flat_map string_view +flat_map system_error +flat_map tuple +flat_map type_traits +flat_map typeinfo +flat_map utility +flat_map variant +flat_map vector +flat_map version format algorithm format array format atomic diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index 828e1d62c6ec3e8..27e229755735840 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -700,6 +700,51 @@ filesystem typeinfo filesystem utility filesystem variant filesystem version +flat_map algorithm +flat_map array +flat_map atomic +flat_map bit +flat_map cctype +flat_map cerrno +flat_map climits +flat_map clocale +flat_map cmath +flat_map compare +flat_map concepts +flat_map cstdarg +flat_map cstddef +flat_map cstdint +flat_map cstdio +flat_map cstdlib +flat_map cstring +flat_map ctime +flat_map cwchar +flat_map cwctype +flat_map exception +flat_map execution +flat_map initializer_list +flat_map ios +flat_map iosfwd +flat_map iterator +flat_map limits +flat_map locale +flat_map memory +flat_map mutex +flat_map new +flat_map optional +flat_map ratio +flat_map stdexcept +flat_map streambuf +flat_map string +flat_map string_view +flat_map system_error +flat_map tuple +flat_map type_traits +flat_map typeinfo +flat_map utility +flat_map variant +flat_map vector +flat_map version format algorithm format array format atomic diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index 0bee6e9beb7af13..b17eb1f2347a86c 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -706,6 +706,50 @@ filesystem utility filesystem variant filesystem vector filesystem version +flat_map algorithm +flat_map array +flat_map atomic +flat_map bit +flat_map cctype +flat_map cerrno +flat_map climits +flat_map clocale +flat_map cmath +flat_map compare +flat_map concepts +flat_map cstdarg +flat_map cstddef +flat_map cstdint +flat_map cstdio +flat_map cstdlib +flat_map cstring +flat_map ctime +flat_map cwchar +flat_map cwctype +flat_map exception +flat_map initializer_list +flat_map ios +flat_map iosfwd +flat_map iterator +flat_map limits +flat_map locale +flat_map memory +flat_map mutex +flat_map new +flat_map optional +flat_map ratio +flat_map stdexcept +flat_map streambuf +flat_map string +flat_map string_view +flat_map system_error +flat_map tuple +flat_map type_traits +flat_map typeinfo +flat_map utility +flat_map variant +flat_map vector +flat_map version format algorithm format array format atomic diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index 026c26f3bd98198..9efec327889c1de 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -694,6 +694,50 @@ filesystem utility filesystem variant filesystem vector filesystem version +flat_map algorithm +flat_map array +flat_map atomic +flat_map bit +flat_map cctype +flat_map cerrno +flat_map climits +flat_map clocale +flat_map cmath +flat_map compare +flat_map concepts +flat_map cstdarg +flat_map cstddef +flat_map cstdint +flat_map cstdio +flat_map cstdlib +flat_map cstring +flat_map ctime +flat_map cwchar +flat_map cwctype +flat_map exception +flat_map initializer_list +flat_map ios +flat_map iosfwd +flat_map iterator +flat_map limits +flat_map locale +flat_map memory +flat_map mutex +flat_map new +flat_map optional +flat_map ratio +flat_map stdexcept +flat_map streambuf +flat_map string +flat_map string_view +flat_map system_error +flat_map tuple +flat_map type_traits +flat_map typeinfo +flat_map utility +flat_map variant +flat_map vector +flat_map version format algorithm format array format atomic diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv index c3db06f15477693..e17f732663a9b28 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -381,6 +381,31 @@ filesystem string_view filesystem tuple filesystem typeinfo filesystem version +flat_map array +flat_map cctype +flat_map cerrno +flat_map climits +flat_map clocale +flat_map compare +flat_map cstddef +flat_map cstdint +flat_map cstdio +flat_map cstdlib +flat_map cstring +flat_map cwchar +flat_map cwctype +flat_map initializer_list +flat_map iosfwd +flat_map limits +flat_map new +flat_map optional +flat_map stdexcept +flat_map string +flat_map string_view +flat_map tuple +flat_map typeinfo +flat_map vector +flat_map version format array format cctype format cerrno diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv index 8d7560344ee541c..c56f5cdfad00727 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -381,6 +381,31 @@ filesystem string_view filesystem tuple filesystem typeinfo filesystem version +flat_map array +flat_map cctype +flat_map cerrno +flat_map climits +flat_map clocale +flat_map compare +flat_map cstddef +flat_map cstdint +flat_map cstdio +flat_map cstdlib +flat_map cstring +flat_map cwchar +flat_map cwctype +flat_map initializer_list +flat_map iosfwd +flat_map limits +flat_map new +flat_map optional +flat_map stdexcept +flat_map string +flat_map string_view +flat_map tuple +flat_map typeinfo +flat_map vector +flat_map version format array format cctype format cerrno diff --git a/libcxx/test/std/containers/container.adaptors/NaiveStaticVector.h b/libcxx/test/std/containers/container.adaptors/NaiveStaticVector.h new file mode 100644 index 000000000000000..61fa3504e34e3a2 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/NaiveStaticVector.h @@ -0,0 +1,94 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SUPPORT_NAIVE_STATIC_VECTOR_H +#define SUPPORT_NAIVE_STATIC_VECTOR_H + +#include +#include +#include "test_iterators.h" +#include "test_macros.h" + +template +struct NaiveStaticVector { + struct CapacityError {}; + + using value_type = T; + using difference_type = short; + using size_type = unsigned short; + using iterator = random_access_iterator; + using const_iterator = random_access_iterator; + + explicit NaiveStaticVector() = default; + template + explicit NaiveStaticVector(It first, It last) { + while (first != last) + insert(*first++); + } + + // Moving-from a NaiveStaticVector leaves the source vector holding moved-from objects. + // This is intentional (the "Naive" in the name). + // Specifically, moving-out-of a sorted+uniqued NaiveStaticVector + // will leave it in a non-sorted+uniqued state. + + NaiveStaticVector(const NaiveStaticVector&) = default; + NaiveStaticVector(NaiveStaticVector&&) = default; // deliberately don't reset size_ + NaiveStaticVector& operator=(const NaiveStaticVector&) = default; + NaiveStaticVector& operator=(NaiveStaticVector&&) = default; + + iterator begin() { return iterator(data_); } + const_iterator begin() const { return const_iterator(data_); } + const_iterator cbegin() const { return const_iterator(data_); } + iterator end() { return begin() + size(); } + const_iterator end() const { return begin() + size(); } + size_type size() const { return size_; } + bool empty() const { return size_ == 0; } + + void clear() { size_ = 0; } + + template + iterator insert(const_iterator pos, It first, It last) { + iterator result = pos - cbegin() + begin(); + while (first != last) { + insert(pos++, *first++); + } + return result; + } + + iterator insert(const_iterator pos, T value) { + if (size_ == N) { + throw CapacityError(); + } + int i = pos - cbegin(); + size_ += 1; + std::move_backward(&data_[i], &data_[size_ - 1], &data_[size_]); + data_[i] = std::move(value); + return begin() + i; + } + + template + iterator emplace(const_iterator pos, Args&&... args) { + return insert(pos, T(std::forward(args)...)); + } + + iterator erase(const_iterator first, const_iterator last) { + int i = first - cbegin(); + int j = last - cbegin(); + std::move(&data_[j], &data_[size_], &data_[i]); + size_ -= (last - first); + return begin() + i; + } + + iterator erase(const_iterator pos) { return erase(pos, std::next(pos)); } + +private: + T data_[N]; + std::size_t size_ = 0; +}; + +#endif // SUPPORT_NAIVE_STATIC_VECTOR_H diff --git a/libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_unique.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_unique.pass.cpp new file mode 100644 index 000000000000000..c602d2d3d38f794 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_unique.pass.cpp @@ -0,0 +1,44 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// struct sorted_unique_t { explicit sorted_unique_t() = default; }; +// inline constexpr sorted_unique_t sorted_unique{}; + +#include +#include +#include +#include + +template +void implicit_test(T) {} + +template +concept HasImplicitDefaultCtor = requires { implicit_test({}); }; + +static_assert(std::is_default_constructible_v); +static_assert(std::is_trivially_default_constructible_v); +static_assert(!HasImplicitDefaultCtor); + +constexpr bool test() { + { [[maybe_unused]] std::sorted_unique_t s; } + { [[maybe_unused]] std::same_as decltype(auto) s = (std::sorted_unique); } + { [[maybe_unused]] std::same_as decltype(auto) copy = std::sorted_unique; } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at.pass.cpp new file mode 100644 index 000000000000000..d30055bf1701cdf --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at.pass.cpp @@ -0,0 +1,92 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// mapped_type& at(const key_type& k); +// const mapped_type& at(const key_type& k) const; + +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "min_allocator.h" +#include "test_macros.h" + +template +void test() { + using P = std::pair; + P ar[] = { + P(1, 1.5), + P(2, 2.5), + P(3, 3.5), + P(4, 4.5), + P(5, 5.5), + P(7, 7.5), + P(8, 8.5), + }; + const int one = 1; + { + std::flat_map, KeyContainer, ValueContainer> m(ar, ar + sizeof(ar) / sizeof(ar[0])); + ASSERT_SAME_TYPE(decltype(m.at(one)), double&); + assert(m.size() == 7); + assert(m.at(one) == 1.5); + m.at(1) = -1.5; + assert(m.at(1) == -1.5); + assert(m.at(2) == 2.5); + assert(m.at(3) == 3.5); + assert(m.at(4) == 4.5); + assert(m.at(5) == 5.5); +#ifndef TEST_HAS_NO_EXCEPTIONS + try { + TEST_IGNORE_NODISCARD m.at(6); + assert(false); + } catch (std::out_of_range&) { + } +#endif + assert(m.at(7) == 7.5); + assert(m.at(8) == 8.5); + assert(m.size() == 7); + } + { + const std::flat_map, KeyContainer, ValueContainer> m( + ar, ar + sizeof(ar) / sizeof(ar[0])); + ASSERT_SAME_TYPE(decltype(m.at(one)), const double&); + assert(m.size() == 7); + assert(m.at(one) == 1.5); + assert(m.at(2) == 2.5); + assert(m.at(3) == 3.5); + assert(m.at(4) == 4.5); + assert(m.at(5) == 5.5); +#ifndef TEST_HAS_NO_EXCEPTIONS + try { + TEST_IGNORE_NODISCARD m.at(6); + assert(false); + } catch (std::out_of_range&) { + } +#endif + assert(m.at(7) == 7.5); + assert(m.at(8) == 8.5); + assert(m.size() == 7); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at_transparent.pass.cpp new file mode 100644 index 000000000000000..13edca915fd005c --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at_transparent.pass.cpp @@ -0,0 +1,111 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template mapped_type& at(const K& x); +// template const mapped_type& at(const K& x) const; + +#include +#include +#include +#include +#include + +#include "../helpers.h" +#include "min_allocator.h" +#include "MinSequenceContainer.h" +#include "test_macros.h" + +// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type. +template +concept CanAt = requires(M m, Transparent k) { m.at(k); }; +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; +static_assert(CanAt); +static_assert(CanAt); +static_assert(!CanAt); +static_assert(!CanAt); + +template +void test() { + using P = std::pair; + P ar[] = { + P(1, 1.5), + P(2, 2.5), + P(3, 3.5), + P(4, 4.5), + P(5, 5.5), + P(7, 7.5), + P(8, 8.5), + }; + const Transparent one{1}; + { + std::flat_map m( + ar, ar + sizeof(ar) / sizeof(ar[0])); + ASSERT_SAME_TYPE(decltype(m.at(one)), double&); + assert(m.size() == 7); + assert(m.at(one) == 1.5); + m.at(one) = -1.5; + assert(m.at(Transparent{1}) == -1.5); + assert(m.at(Transparent{2}) == 2.5); + assert(m.at(Transparent{3}) == 3.5); + assert(m.at(Transparent{4}) == 4.5); + assert(m.at(Transparent{5}) == 5.5); +#ifndef TEST_HAS_NO_EXCEPTIONS + try { + TEST_IGNORE_NODISCARD m.at(Transparent{6}); + assert(false); + } catch (std::out_of_range&) { + } +#endif + assert(m.at(Transparent{7}) == 7.5); + assert(m.at(Transparent{8}) == 8.5); + assert(m.size() == 7); + } + { + const std::flat_map m( + ar, ar + sizeof(ar) / sizeof(ar[0])); + ASSERT_SAME_TYPE(decltype(m.at(one)), const double&); + assert(m.size() == 7); + assert(m.at(Transparent{1}) == 1.5); + assert(m.at(Transparent{2}) == 2.5); + assert(m.at(Transparent{3}) == 3.5); + assert(m.at(Transparent{4}) == 4.5); + assert(m.at(Transparent{5}) == 5.5); +#ifndef TEST_HAS_NO_EXCEPTIONS + try { + TEST_IGNORE_NODISCARD m.at(Transparent{6}); + assert(false); + } catch (std::out_of_range&) { + } +#endif + assert(m.at(Transparent{7}) == 7.5); + assert(m.at(Transparent{8}) == 8.5); + assert(m.size() == 7); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + m.at(Transparent{3}); + assert(transparent_used); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_key.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_key.pass.cpp new file mode 100644 index 000000000000000..ea2f5d800878a23 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_key.pass.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// mapped_type& operator[](const key_type& k); + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "min_allocator.h" +#include "test_macros.h" + +// Constraints: is_constructible_v is true. +template +concept CanIndex = requires(M m, Input k) { m[k]; }; + +static_assert(CanIndex, const int&>); +static_assert(!CanIndex, const int&>); + +template +void test() { + using P = std::pair; + P ar[] = { + P(1, 1.5), + P(2, 2.5), + P(3, 3.5), + P(4, 4.5), + P(5, 5.5), + P(7, 7.5), + P(8, 8.5), + }; + const int one = 1; + std::flat_map, KeyContainer, ValueContainer> m(ar, ar + sizeof(ar) / sizeof(ar[0])); + ASSERT_SAME_TYPE(decltype(m[one]), double&); + assert(m.size() == 7); + assert(m[one] == 1.5); + assert(m.size() == 7); + m[1] = -1.5; + assert(m[1] == -1.5); + assert(m.size() == 7); + assert(m[6] == 0); + assert(m.size() == 8); + m[6] = 6.5; + assert(m[6] == 6.5); + assert(m.size() == 8); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto index_func = [](auto& m, auto key_arg, auto value_arg) { + using FlatMap = std::decay_t; + const typename FlatMap::key_type key = key_arg; + const typename FlatMap::mapped_type value = value_arg; + m[key] = value; + }; + test_emplace_exception_guarantee(index_func); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_rv_key.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_rv_key.pass.cpp new file mode 100644 index 000000000000000..faacc3cfe8f96fa --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_rv_key.pass.cpp @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// mapped_type& operator[](key_type&& k); + +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "MoveOnly.h" +#include "min_allocator.h" + +// Constraints: is_constructible_v is true. +template +concept CanIndex = requires(M m, Input k) { m[k]; }; + +static_assert(CanIndex, int&&>); +static_assert(!CanIndex, int&&>); + +template +void test() { + { + std::flat_map, KeyContainer, ValueContainer> m; + ASSERT_SAME_TYPE(decltype(m[MoveOnly{}]), double&); + assert(m.size() == 0); + assert(m[1] == 0.0); + assert(m.size() == 1); + m[1] = -1.5; + assert(m[1] == -1.5); + assert(m.size() == 1); + assert(m[6] == 0); + assert(m.size() == 2); + m[6] = 6.5; + assert(m[6] == 6.5); + assert(m.size() == 2); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto index_func = [](auto& m, auto key_arg, auto value_arg) { + using FlatMap = std::decay_t; + typename FlatMap::key_type key = key_arg; + const typename FlatMap::mapped_type value = value_arg; + m[std::move(key)] = value; + }; + test_emplace_exception_guarantee(index_func); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_transparent.pass.cpp new file mode 100644 index 000000000000000..24c08464f3158c4 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_transparent.pass.cpp @@ -0,0 +1,107 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template mapped_type& operator[](K&& x); + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +// Constraints: +// The qualified-id Compare::is_transparent is valid and denotes a type. +// is_constructible_v is true. +// is_constructible_v is true. +// is_convertible_v and is_convertible_v are both false +template +concept CanIndex = requires(M m, Input k) { m[k]; }; +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; +using TransparentNoDefaultCtrValueMap = std::flat_map; + +static_assert(CanIndex>); +static_assert(!CanIndex>); + +static_assert(!CanIndex>); +static_assert(!CanIndex>); + +static_assert(!CanIndex>); +static_assert(!CanIndex>); + +static_assert(!CanIndex>); +static_assert(!CanIndex>); + +static_assert(!CanIndex); +static_assert(!CanIndex); + +template +void test() { + using P = std::pair; + P ar[] = { + P(1, 1.5), + P(2, 2.5), + P(3, 3.5), + P(4, 4.5), + P(5, 5.5), + P(7, 7.5), + P(8, 8.5), + }; + const ConvertibleTransparent one{1}; + const ConvertibleTransparent six{6}; + { + std::flat_map m( + ar, ar + sizeof(ar) / sizeof(ar[0])); + ASSERT_SAME_TYPE(decltype(m[one]), double&); + assert(m.size() == 7); + assert(m[one] == 1.5); + assert(m.size() == 7); + m[one] = -1.5; + assert(m[one] == -1.5); + assert(m.size() == 7); + assert(m[six] == 0); + assert(m.size() == 8); + m[six] = 6.5; + assert(m[six] == 6.5); + assert(m.size() == 8); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + m[ConvertibleTransparent{3}]; + assert(transparent_used); + } + { + auto index_func = [](auto& m, auto key_arg, auto value_arg) { + using FlatMap = std::decay_t; + using Key = typename FlatMap::key_type; + const typename FlatMap::mapped_type value = value_arg; + m[ConvertibleTransparent{key_arg}] = value; + }; + test_emplace_exception_guarantee(index_func); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.pass.cpp new file mode 100644 index 000000000000000..5ecc2cf7c917bd2 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.pass.cpp @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// [[nodiscard]] bool empty() const noexcept; + +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m; + ASSERT_SAME_TYPE(decltype(m.empty()), bool); + ASSERT_NOEXCEPT(m.empty()); + assert(m.empty()); + assert(std::as_const(m).empty()); + m = {{1, 1.0}}; + assert(!m.empty()); + m.clear(); + assert(m.empty()); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.verify.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.verify.cpp new file mode 100644 index 000000000000000..cc8016182dcb664 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.verify.cpp @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// [[nodiscard]] bool empty() const noexcept; + +#include + +#include "test_macros.h" + +int main(int, char**) { + std::flat_map c; + c.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/max_size.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/max_size.pass.cpp new file mode 100644 index 000000000000000..87acdfd2cf6250a --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/max_size.pass.cpp @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// size_type max_size() const noexcept; + +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_allocator.h" +#include "test_macros.h" + +int main(int, char**) { + { + using A1 = limited_allocator; + using A2 = limited_allocator; + using C = std::flat_map, std::vector, std::vector>; + ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t); + ASSERT_SAME_TYPE(C::size_type, std::size_t); + const C c; + ASSERT_NOEXCEPT(c.max_size()); + ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type); + assert(c.max_size() <= 10); + LIBCPP_ASSERT(c.max_size() == 10); + } + { + using A1 = limited_allocator; + using A2 = limited_allocator; + using C = std::flat_map, std::vector, std::vector>; + ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t); + ASSERT_SAME_TYPE(C::size_type, std::size_t); + const C c; + ASSERT_NOEXCEPT(c.max_size()); + ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type); + assert(c.max_size() <= 10); + LIBCPP_ASSERT(c.max_size() == 10); + } + { + using A = limited_allocator; + using C = std::flat_map, std::vector, std::vector>; + ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t); + ASSERT_SAME_TYPE(C::size_type, std::size_t); + const C::size_type max_dist = static_cast(std::numeric_limits::max()); + const C c; + ASSERT_NOEXCEPT(c.max_size()); + ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type); + assert(c.max_size() <= max_dist); + LIBCPP_ASSERT(c.max_size() == max_dist); + } + { + typedef std::flat_map C; + ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t); + ASSERT_SAME_TYPE(C::size_type, std::size_t); + const C::size_type max_dist = static_cast(std::numeric_limits::max()); + const C c; + ASSERT_NOEXCEPT(c.max_size()); + ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type); + assert(c.max_size() <= max_dist); + assert(c.max_size() <= alloc_max_size(std::allocator())); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/size.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/size.pass.cpp new file mode 100644 index 000000000000000..957a860450091f9 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/size.pass.cpp @@ -0,0 +1,65 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// size_type size() const noexcept; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using M = std::flat_map, KeyContainer, ValueContainer>; + { + const M m = {{1, 'a'}, {1, 'b'}, {4, 'd'}, {5, 'e'}, {5, 'h'}}; + ASSERT_SAME_TYPE(decltype(m.size()), std::size_t); + ASSERT_NOEXCEPT(m.size()); + assert(m.size() == 3); + } + { + const M m = {{1, 'a'}}; + ASSERT_SAME_TYPE(decltype(m.size()), std::size_t); + ASSERT_NOEXCEPT(m.size()); + assert(m.size() == 1); + } + { + const M m; + ASSERT_SAME_TYPE(decltype(m.size()), std::size_t); + ASSERT_NOEXCEPT(m.size()); + assert(m.size() == 0); + } + { + M m; + std::size_t s = 1000000; + for (auto i = 0u; i < s; ++i) { + m.emplace(i, 'a'); + } + ASSERT_SAME_TYPE(decltype(m.size()), std::size_t); + ASSERT_NOEXCEPT(m.size()); + assert(m.size() == s); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/alloc.pass.cpp new file mode 100644 index 000000000000000..3f8d2ed332d6b32 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/alloc.pass.cpp @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// explicit flat_map(const Allocator& a); + +#include +#include +#include +#include + +#include "test_macros.h" +#include "test_allocator.h" +#include "../../../test_compare.h" + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + { + // explicit + using M = + std::flat_map, + std::vector>, + std::vector>>; + + static_assert(std::is_constructible_v>); + static_assert(!std::is_convertible_v, M>); + } + { + using A = test_allocator; + using M = + std::flat_map, + std::vector>, + std::vector>>; + M m(A(0, 5)); + assert(m.empty()); + assert(m.begin() == m.end()); + assert(m.keys().get_allocator().get_id() == 5); + assert(m.values().get_allocator().get_id() == 5); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/assign_initializer_list.pass.cpp new file mode 100644 index 000000000000000..06bde71e79941e8 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/assign_initializer_list.pass.cpp @@ -0,0 +1,59 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map& operator=(initializer_list il); + +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" +#include "test_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + { + M m = {{8, 8}, {10, 10}}; + assert(m.size() == 2); + m = {{3, 0}, {1, 0}, {2, 0}, {2, 1}, {3, 1}, {4, 0}, {3, 2}, {5, 0}, {6, 0}, {5, 1}}; + std::pair expected[] = {{1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0}, {6, 0}}; + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + } + { + M m = {{10, 1}, {8, 1}}; + assert(m.size() == 2); + m = {{3, 2}}; + std::pair expected[] = {{3, 2}}; + assert(std::ranges::equal(m, expected)); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/compare.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/compare.pass.cpp new file mode 100644 index 000000000000000..40a1710f55e4220 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/compare.pass.cpp @@ -0,0 +1,93 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// explicit flat_map(const key_compare& comp); +// template +// flat_map(const key_compare& comp, const Alloc& a); + +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "../../../test_compare.h" +#include "test_allocator.h" + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using M1 = std::flat_map, std::vector>; + using M2 = std::flat_map, std::vector>; + using M3 = std::flat_map, std::vector>; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + { + using C = test_less; + auto m = std::flat_map(C(3)); + assert(m.empty()); + assert(m.begin() == m.end()); + assert(m.key_comp() == C(3)); + } + { + // The one-argument ctor is explicit. + using C = test_less; + static_assert(std::is_constructible_v, C>); + static_assert(!std::is_convertible_v>); + + static_assert(std::is_constructible_v, std::less>); + static_assert(!std::is_convertible_v, std::flat_map>); + } + { + using C = test_less; + using A1 = test_allocator; + using A2 = test_allocator; + auto m = std::flat_map, std::vector>(C(4), A1(5)); + assert(m.empty()); + assert(m.begin() == m.end()); + assert(m.key_comp() == C(4)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // explicit(false) + using C = test_less; + using A1 = test_allocator; + using A2 = test_allocator; + std::flat_map, std::deque> m = {C(4), A1(5)}; + assert(m.empty()); + assert(m.begin() == m.end()); + assert(m.key_comp() == C(4)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // If an allocator is given, it must be usable by both containers. + using A = test_allocator; + using M = std::flat_map, std::vector, std::vector>; + static_assert(std::is_constructible_v>); + static_assert(!std::is_constructible_v, std::allocator>); + static_assert(!std::is_constructible_v, A>); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/containers.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/containers.pass.cpp new file mode 100644 index 000000000000000..812e2c3e4f02a82 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/containers.pass.cpp @@ -0,0 +1,184 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(key_container_type key_cont, mapped_container_type mapped_cont, +// const key_compare& comp = key_compare()); +// template +// flat_map(const key_container_type& key_cont, const mapped_container_type& mapped_cont, +// const Allocator& a); +// template +// flat_map(const key_container_type& key_cont, const mapped_container_type& mapped_cont, +// const key_compare& comp, const Alloc& a); + +#include +#include +#include +#include +#include + +#include "min_allocator.h" +#include "MoveOnly.h" +#include "test_allocator.h" +#include "test_iterators.h" +#include "test_macros.h" +#include "../../../test_compare.h" + +struct P { + int first; + int second; + template + bool operator==(const std::pair& rhs) const { + return MoveOnly(first) == rhs.first && MoveOnly(second) == rhs.second; + } +}; + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + { + // flat_map(key_container_type , mapped_container_type) + using M = std::flat_map; + std::vector ks = {1, 1, 1, 2, 2, 3, 2, 3, 3}; + std::vector vs = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto m = M(ks, vs); + assert((m.keys() == std::vector{1, 2, 3})); + LIBCPP_ASSERT((m.values() == std::vector{1, 4, 6})); + + // explicit(false) + M m2 = {ks, vs}; + assert(m2 == m); + + m = M(std::move(ks), std::move(vs)); + assert(ks.empty()); // it was moved-from + assert(vs.empty()); // it was moved-from + assert((m.keys() == std::vector{1, 2, 3})); + LIBCPP_ASSERT((m.values() == std::vector{1, 4, 6})); + } + { + // flat_map(key_container_type , mapped_container_type) + // move-only + P expected[] = {{3, 2}, {2, 1}, {1, 3}}; + using Ks = std::deque>; + using Vs = std::vector>; + using M = std::flat_map, Ks, Vs>; + Ks ks = {1, 3, 2}; + Vs vs; + vs.push_back(3); + vs.push_back(2); + vs.push_back(1); + auto m = M(std::move(ks), std::move(vs)); + assert(ks.empty()); // it was moved-from + assert(vs.empty()); // it was moved-from + assert(std::ranges::equal(m, expected, std::equal_to<>())); + } + { + // flat_map(key_container_type , mapped_container_type) + // container's allocators are used + using A = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + auto ks = std::vector({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(5)); + auto vs = std::deque({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(6)); + auto m = M(std::move(ks), std::move(vs)); + assert(ks.empty()); // it was moved-from + assert(vs.empty()); // it was moved-from + assert((m == M{{1, 1}, {2, 2}, {3, 3}})); + assert(m.keys().get_allocator() == A(5)); + assert(m.values().get_allocator() == A(6)); + } + { + // flat_map(key_container_type , mapped_container_type, key_compare) + using C = test_less; + using M = std::flat_map; + std::vector ks = {1, 1, 1, 2, 2, 3, 2, 3, 3}; + std::vector vs = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto m = M(ks, vs, C(4)); + assert((m.keys() == std::vector{1, 2, 3})); + LIBCPP_ASSERT((m.values() == std::vector{1, 4, 6})); + assert(m.key_comp() == C(4)); + + // explicit(false) + M m2 = {ks, vs, C(4)}; + assert(m2 == m); + assert(m2.key_comp() == C(4)); + } + { + // flat_map(key_container_type , mapped_container_type, const Allocator&) + using A = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + auto ks = std::vector({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(5)); + auto vs = std::deque({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(6)); + auto m = M(ks, vs, A(4)); // replaces the allocators + assert(!ks.empty()); // it was an lvalue above + assert(!vs.empty()); // it was an lvalue above + assert((m == M{{1, 1}, {2, 2}, {3, 3}})); + assert(m.keys().get_allocator() == A(4)); + assert(m.values().get_allocator() == A(4)); + } + { + // flat_map(key_container_type , mapped_container_type, const Allocator&) + // explicit(false) + using A = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + auto ks = std::vector({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(5)); + auto vs = std::deque({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(6)); + M m = {ks, vs, A(4)}; // implicit ctor + assert(!ks.empty()); // it was an lvalue above + assert(!vs.empty()); // it was an lvalue above + assert((m == M{{1, 1}, {2, 2}, {3, 3}})); + assert(m.keys().get_allocator() == A(4)); + assert(m.values().get_allocator() == A(4)); + } + { + // flat_map(key_container_type , mapped_container_type, key_compare, const Allocator&) + using C = test_less; + using A = test_allocator; + using M = std::flat_map, std::vector>; + std::vector ks = {1, 1, 1, 2, 2, 3, 2, 3, 3}; + std::vector vs = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto m = M(ks, vs, C(4), A(5)); + assert((m.keys() == std::vector{1, 2, 3})); + LIBCPP_ASSERT((m.values() == std::vector{1, 4, 6})); + assert(m.key_comp() == C(4)); + assert(m.keys().get_allocator() == A(5)); + assert(m.values().get_allocator() == A(5)); + + // explicit(false) + M m2 = {ks, vs, C(4), A(5)}; + assert(m2 == m); + assert(m2.key_comp() == C(4)); + assert(m2.keys().get_allocator() == A(5)); + assert(m2.values().get_allocator() == A(5)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy.pass.cpp new file mode 100644 index 000000000000000..fcd0415088c1c96 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy.pass.cpp @@ -0,0 +1,70 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(const flat_map& m); + +#include +#include +#include + +#include "test_macros.h" +#include "../../../test_compare.h" +#include "test_allocator.h" + +int main(int, char**) { + { + using C = test_less; + std::vector> ks({1, 3, 5}, test_allocator(6)); + std::vector> vs({2, 2, 1}, test_allocator(7)); + using M = std::flat_map; + auto mo = M(ks, vs, C(5)); + auto m = mo; + + assert(m.key_comp() == C(5)); + assert(m.keys() == ks); + assert(m.values() == vs); + assert(m.keys().get_allocator() == test_allocator(6)); + assert(m.values().get_allocator() == test_allocator(7)); + + // mo is unchanged + assert(mo.key_comp() == C(5)); + assert(mo.keys() == ks); + assert(mo.values() == vs); + assert(mo.keys().get_allocator() == test_allocator(6)); + assert(mo.values().get_allocator() == test_allocator(7)); + } + { + using C = test_less; + using Ks = std::vector>; + using Vs = std::vector>; + auto ks = Ks({1, 3, 5}, other_allocator(6)); + auto vs = Vs({2, 2, 1}, other_allocator(7)); + using M = std::flat_map; + auto mo = M(Ks(ks, other_allocator(6)), Vs(vs, other_allocator(7)), C(5)); + auto m = mo; + + assert(m.key_comp() == C(5)); + assert(m.keys() == ks); + assert(m.values() == vs); + assert(m.keys().get_allocator() == other_allocator(-2)); + assert(m.values().get_allocator() == other_allocator(-2)); + + // mo is unchanged + assert(mo.key_comp() == C(5)); + assert(mo.keys() == ks); + assert(mo.values() == vs); + assert(mo.keys().get_allocator() == other_allocator(6)); + assert(mo.values().get_allocator() == other_allocator(7)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_alloc.pass.cpp new file mode 100644 index 000000000000000..cbda6ea853268af --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_alloc.pass.cpp @@ -0,0 +1,67 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(const flat_map&, const allocator_type&); + +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "../../../test_compare.h" +#include "test_allocator.h" + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + { + using C = test_less; + std::vector> ks({1, 3, 5}, test_allocator(6)); + std::vector> vs({2, 2, 1}, test_allocator(7)); + using M = std::flat_map; + auto mo = M(ks, vs, C(5)); + auto m = M(mo, test_allocator(3)); + + assert(m.key_comp() == C(5)); + assert(m.keys() == ks); + assert(m.values() == vs); + assert(m.keys().get_allocator() == test_allocator(3)); + assert(m.values().get_allocator() == test_allocator(3)); + + // mo is unchanged + assert(mo.key_comp() == C(5)); + assert(mo.keys() == ks); + assert(mo.values() == vs); + assert(mo.keys().get_allocator() == test_allocator(6)); + assert(mo.values().get_allocator() == test_allocator(7)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.addressof.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.addressof.compile.pass.cpp new file mode 100644 index 000000000000000..e9b752d5eb12b04 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.addressof.compile.pass.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map& operator=(const flat_map& s); + +// Validate whether the container can be copy-assigned (move-assigned, swapped) +// with an ADL-hijacking operator& + +#include +#include + +#include "test_macros.h" +#include "operator_hijacker.h" + +void test() { + std::flat_map so; + std::flat_map s; + s = so; + s = std::move(so); + swap(s, so); +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.pass.cpp new file mode 100644 index 000000000000000..4f9797d5bf810af --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.pass.cpp @@ -0,0 +1,92 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map& operator=(const flat_map& m); + +#include +#include +#include +#include + +#include "test_macros.h" +#include "../../../test_compare.h" +#include "test_allocator.h" + +int main(int, char**) { + { + // test_allocator is not propagated + using C = test_less; + std::vector> ks({1, 3, 5}, test_allocator(6)); + std::vector> vs({2, 2, 1}, test_allocator(7)); + using M = std::flat_map; + auto mo = M(ks, vs, C(5)); + auto m = M({{3, 3}, {4, 4}, {5, 5}}, C(3), test_allocator(2)); + m = mo; + + assert(m.key_comp() == C(5)); + assert(m.keys() == ks); + assert(m.values() == vs); + assert(m.keys().get_allocator() == test_allocator(2)); + assert(m.values().get_allocator() == test_allocator(2)); + + // mo is unchanged + assert(mo.key_comp() == C(5)); + assert(mo.keys() == ks); + assert(mo.values() == vs); + assert(mo.keys().get_allocator() == test_allocator(6)); + assert(mo.values().get_allocator() == test_allocator(7)); + } + { + // other_allocator is propagated + using C = test_less; + using Ks = std::vector>; + using Vs = std::vector>; + auto ks = Ks({1, 3, 5}, other_allocator(6)); + auto vs = Vs({2, 2, 1}, other_allocator(7)); + using M = std::flat_map; + auto mo = M(Ks(ks, other_allocator(6)), Vs(vs, other_allocator(7)), C(5)); + auto m = M({{3, 3}, {4, 4}, {5, 5}}, C(3), other_allocator(2)); + m = mo; + + assert(m.key_comp() == C(5)); + assert(m.keys() == ks); + assert(m.values() == vs); + assert(m.keys().get_allocator() == other_allocator(6)); + assert(m.values().get_allocator() == other_allocator(7)); + + // mo is unchanged + assert(mo.key_comp() == C(5)); + assert(mo.keys() == ks); + assert(mo.values() == vs); + assert(mo.keys().get_allocator() == other_allocator(6)); + assert(mo.values().get_allocator() == other_allocator(7)); + } + { + // comparator is copied and invariant is preserved + using M = std::flat_map>; + M mo = M({{1, 2}, {3, 4}}, std::less()); + M m = M({{1, 2}, {3, 4}}, std::greater()); + assert(m.key_comp()(2, 1) == true); + assert(m != mo); + m = mo; + assert(m.key_comp()(2, 1) == false); + assert(m == mo); + } + { + // self-assignment + using M = std::flat_map; + M m = {{1, 2}, {3, 4}}; + m = static_cast(m); + assert((m == M{{1, 2}, {3, 4}})); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.pass.cpp new file mode 100644 index 000000000000000..d01bee9aae9c086 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.pass.cpp @@ -0,0 +1,342 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "deduction_guides_sfinae_checks.h" +#include "test_allocator.h" + +using P = std::pair; +using PC = std::pair; + +void test_copy() { + { + std::flat_map source = {{1, 2}, {2, 3}}; + std::flat_map s(source); + ASSERT_SAME_TYPE(decltype(s), decltype(source)); + assert(s == source); + } + { + std::flat_map> source = {{1, 2}, {2, 3}}; + std::flat_map s{source}; // braces instead of parens + ASSERT_SAME_TYPE(decltype(s), decltype(source)); + assert(s == source); + } + { + std::flat_map> source = {{1, 2}, {2, 3}}; + std::flat_map s(source, std::allocator()); + ASSERT_SAME_TYPE(decltype(s), decltype(source)); + assert(s == source); + } +} + +void test_containers() { + std::deque> ks({1, 2, 1, INT_MAX, 3}, test_allocator(0, 42)); + std::deque> vs({1, 2, 1, 4, 5}, test_allocator(0, 43)); + std::deque> sorted_ks({1, 2, 3, INT_MAX}, test_allocator(0, 42)); + std::deque> sorted_vs({1, 2, 5, 4}, test_allocator(0, 43)); + const std::pair expected[] = {{1, 1}, {2, 2}, {3, 5}, {INT_MAX, 4}}; + { + std::flat_map s(ks, vs); + + ASSERT_SAME_TYPE(decltype(s), std::flat_map, decltype(ks), decltype(vs)>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 42); + assert(s.values().get_allocator().get_id() == 43); + } + { + std::flat_map s(std::sorted_unique, sorted_ks, sorted_vs); + + ASSERT_SAME_TYPE(decltype(s), std::flat_map, decltype(ks), decltype(vs)>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 42); + assert(s.values().get_allocator().get_id() == 43); + } + { + std::flat_map s(ks, vs, test_allocator(0, 44)); + + ASSERT_SAME_TYPE(decltype(s), std::flat_map, decltype(ks), decltype(vs)>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 44); + assert(s.values().get_allocator().get_id() == 44); + } + { + std::flat_map s(std::sorted_unique, sorted_ks, sorted_vs, test_allocator(0, 44)); + + ASSERT_SAME_TYPE(decltype(s), std::flat_map, decltype(ks), decltype(vs)>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 44); + assert(s.values().get_allocator().get_id() == 44); + } +} + +void test_containers_compare() { + std::deque> ks({1, 2, 1, INT_MAX, 3}, test_allocator(0, 42)); + std::deque> vs({1, 2, 1, 4, 5}, test_allocator(0, 43)); + std::deque> sorted_ks({INT_MAX, 3, 2, 1}, test_allocator(0, 42)); + std::deque> sorted_vs({4, 5, 2, 1}, test_allocator(0, 43)); + const std::pair expected[] = {{INT_MAX, 4}, {3, 5}, {2, 2}, {1, 1}}; + { + std::flat_map s(ks, vs, std::greater()); + + ASSERT_SAME_TYPE(decltype(s), std::flat_map, decltype(ks), decltype(vs)>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 42); + assert(s.values().get_allocator().get_id() == 43); + } + { + std::flat_map s(std::sorted_unique, sorted_ks, sorted_vs, std::greater()); + + ASSERT_SAME_TYPE(decltype(s), std::flat_map, decltype(ks), decltype(vs)>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 42); + assert(s.values().get_allocator().get_id() == 43); + } + { + std::flat_map s(ks, vs, std::greater(), test_allocator(0, 44)); + + ASSERT_SAME_TYPE(decltype(s), std::flat_map, decltype(ks), decltype(vs)>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 44); + assert(s.values().get_allocator().get_id() == 44); + } + { + std::flat_map s(std::sorted_unique, sorted_ks, sorted_vs, std::greater(), test_allocator(0, 44)); + + ASSERT_SAME_TYPE(decltype(s), std::flat_map, decltype(ks), decltype(vs)>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 44); + assert(s.values().get_allocator().get_id() == 44); + } +} + +void test_iter_iter() { + const P arr[] = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}}; + const P sorted_arr[] = {{1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}}; + const PC arrc[] = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}}; + const PC sorted_arrc[] = {{1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}}; + { + std::flat_map m(std::begin(arr), std::end(arr)); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map m(std::begin(arrc), std::end(arrc)); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map m(std::sorted_unique, std::begin(sorted_arr), std::end(sorted_arr)); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map m(std::sorted_unique, std::begin(sorted_arrc), std::end(sorted_arrc)); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map mo; + std::flat_map m(mo.begin(), mo.end()); + ASSERT_SAME_TYPE(decltype(m), decltype(mo)); + } + { + std::flat_map mo; + std::flat_map m(mo.cbegin(), mo.cend()); + ASSERT_SAME_TYPE(decltype(m), decltype(mo)); + } +} + +void test_iter_iter_compare() { + const P arr[] = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}}; + const P sorted_arr[] = {{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}}; + const PC arrc[] = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}}; + const PC sorted_arrc[] = {{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}}; + using C = std::greater; + { + std::flat_map m(std::begin(arr), std::end(arr), C()); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map m(std::begin(arrc), std::end(arrc), C()); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map m(std::sorted_unique, std::begin(sorted_arr), std::end(sorted_arr), C()); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map m(std::sorted_unique, std::begin(sorted_arrc), std::end(sorted_arrc), C()); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map mo; + std::flat_map m(mo.begin(), mo.end(), C()); + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + } + { + std::flat_map mo; + std::flat_map m(mo.cbegin(), mo.cend(), C()); + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + } +} + +void test_initializer_list() { + const P sorted_arr[] = {{1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}}; + { + std::flat_map m{std::pair{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}}; + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map m(std::sorted_unique, {std::pair{1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}}); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } +} + +void test_initializer_list_compare() { + const P sorted_arr[] = {{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}}; + using C = std::greater; + { + std::flat_map m({std::pair{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}}, C()); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } + { + std::flat_map m(std::sorted_unique, {std::pair{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}}, C()); + + ASSERT_SAME_TYPE(decltype(m), std::flat_map); + assert(std::ranges::equal(m, sorted_arr)); + } +} + +void test_from_range() { + std::list> r = {{1, 1}, {2, 2}, {1, 1}, {INT_MAX, 4}, {3, 5}}; + const std::pair expected[] = {{1, 1}, {2, 2}, {3, 5}, {INT_MAX, 4}}; + { + std::flat_map s(std::from_range, r); + ASSERT_SAME_TYPE(decltype(s), std::flat_map>); + assert(std::ranges::equal(s, expected)); + } + { + std::flat_map s(std::from_range, r, test_allocator(0, 42)); + ASSERT_SAME_TYPE( + decltype(s), + std::flat_map, + std::vector>, + std::vector>>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 42); + assert(s.values().get_allocator().get_id() == 42); + } +} + +void test_from_range_compare() { + std::list> r = {{1, 1}, {2, 2}, {1, 1}, {INT_MAX, 4}, {3, 5}}; + const std::pair expected[] = {{INT_MAX, 4}, {3, 5}, {2, 2}, {1, 1}}; + { + std::flat_map s(std::from_range, r, std::greater()); + ASSERT_SAME_TYPE(decltype(s), std::flat_map>); + assert(std::ranges::equal(s, expected)); + } + { + std::flat_map s(std::from_range, r, std::greater(), test_allocator(0, 42)); + ASSERT_SAME_TYPE( + decltype(s), + std::flat_map, + std::vector>, + std::vector>>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().get_id() == 42); + assert(s.values().get_allocator().get_id() == 42); + } +} + +int main(int, char**) { + // Each test function also tests the sorted_unique-prefixed and allocator-suffixed overloads. + test_copy(); + test_containers(); + test_containers_compare(); + test_iter_iter(); + test_iter_iter_compare(); + test_initializer_list(); + test_initializer_list_compare(); + test_from_range(); + test_from_range_compare(); + + AssociativeContainerDeductionGuidesSfinaeAway>(); + { + std::flat_map s = {std::make_pair(1, 'a')}; // flat_map(initializer_list>) + ASSERT_SAME_TYPE(decltype(s), std::flat_map); + assert(s.size() == 1); + } + { + using M = std::flat_map; + M m; + std::flat_map s = {std::make_pair(m, m)}; // flat_map(initializer_list>) + ASSERT_SAME_TYPE(decltype(s), std::flat_map); + assert(s.size() == 1); + assert(s[m] == m); + } + + { + std::pair source[3] = {{1, 1}, {2, 2}, {3, 3}}; + std::flat_map s = {source, source + 3}; // flat_map(InputIterator, InputIterator) + ASSERT_SAME_TYPE(decltype(s), std::flat_map); + assert(s.size() == 3); + } + { + std::pair source[3] = {{1, 1}, {2, 2}, {3, 3}}; + std::flat_map s{source, source + 3}; // flat_map(InputIterator, InputIterator) + ASSERT_SAME_TYPE(decltype(s), std::flat_map); + assert(s.size() == 3); + } + { + std::pair source[3] = {{1, 1}, {2, 2}, {3, 3}}; + std::flat_map s{std::sorted_unique, source, source + 3}; // flat_map(sorted_unique_t, InputIterator, InputIterator) + static_assert(std::is_same_v>); + assert(s.size() == 3); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.verify.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.verify.cpp new file mode 100644 index 000000000000000..08244f01cb24e13 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.verify.cpp @@ -0,0 +1,97 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// Test CTAD on cases where deduction should fail. + +#include +#include +#include +#include +#include + +struct NotAnAllocator { + friend bool operator<(NotAnAllocator, NotAnAllocator) { return false; } +}; + +using P = std::pair; +using PC = std::pair; + +void test() { + { + // cannot deduce Key and T from just (KeyContainer), even if it's a container of pairs + std::vector> v; + std::flat_map s(v); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot deduce Key and T from just (KeyContainer, Allocator) + std::vector v; + std::flat_map s(v, std::allocator>()); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot deduce Key and T from nothing + std::flat_map m; + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot deduce Key and T from just (Compare) + std::flat_map m(std::less{}); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot deduce Key and T from just (Compare, Allocator) + std::flat_map m(std::less{}, std::allocator{}); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot deduce Key and T from just (Allocator) + std::flat_map m(std::allocator{}); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot convert from some arbitrary unrelated type + NotAnAllocator a; + std::flat_map m(a); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot deduce that the inner braced things should be std::pair and not something else + std::flat_map m{{1, 1L}, {2, 2L}, {3, 3L}}; + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot deduce that the inner braced things should be std::pair and not something else + std::flat_map m({{1, 1L}, {2, 2L}, {3, 3L}}, std::less()); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot deduce that the inner braced things should be std::pair and not something else + std::flat_map m({{1, 1L}, {2, 2L}, {3, 3L}}, std::less(), std::allocator()); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // cannot deduce that the inner braced things should be std::pair and not something else + std::flat_map m({{1, 1L}, {2, 2L}, {3, 3L}}, std::allocator()); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // since we have parens, not braces, this deliberately does not find the initializer_list constructor + std::flat_map m(P{1, 1L}); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } + { + // since we have parens, not braces, this deliberately does not find the initializer_list constructor + std::flat_map m(PC{1, 1L}); + // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}} + } +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct_pmr.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct_pmr.pass.cpp new file mode 100644 index 000000000000000..11c18ac13c76a92 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct_pmr.pass.cpp @@ -0,0 +1,106 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// UNSUPPORTED: availability-pmr-missing + +// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_allocator.h" + +using P = std::pair; +using PC = std::pair; + +void test_containers() { + std::deque> ks({1, 2, 1, INT_MAX, 3}, test_allocator(0, 42)); + std::deque> vs({1, 2, 1, 4, 5}, test_allocator(0, 43)); + std::deque> sorted_ks({1, 2, 3, INT_MAX}, test_allocator(0, 42)); + std::deque> sorted_vs({1, 2, 5, 4}, test_allocator(0, 43)); + const std::pair expected[] = {{1, 1}, {2, 2}, {3, 5}, {INT_MAX, 4}}; + { + std::pmr::monotonic_buffer_resource mr; + std::pmr::monotonic_buffer_resource mr2; + std::pmr::deque pks(ks.begin(), ks.end(), &mr); + std::pmr::deque pvs(vs.begin(), vs.end(), &mr); + std::flat_map s(std::move(pks), std::move(pvs), &mr2); + + ASSERT_SAME_TYPE( + decltype(s), std::flat_map, std::pmr::deque, std::pmr::deque>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().resource() == &mr2); + assert(s.values().get_allocator().resource() == &mr2); + } + { + std::pmr::monotonic_buffer_resource mr; + std::pmr::monotonic_buffer_resource mr2; + std::pmr::deque pks(sorted_ks.begin(), sorted_ks.end(), &mr); + std::pmr::deque pvs(sorted_vs.begin(), sorted_vs.end(), &mr); + std::flat_map s(std::sorted_unique, std::move(pks), std::move(pvs), &mr2); + + ASSERT_SAME_TYPE( + decltype(s), std::flat_map, std::pmr::deque, std::pmr::deque>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().resource() == &mr2); + assert(s.values().get_allocator().resource() == &mr2); + } +} + +void test_containers_compare() { + std::deque> ks({1, 2, 1, INT_MAX, 3}, test_allocator(0, 42)); + std::deque> vs({1, 2, 1, 4, 5}, test_allocator(0, 43)); + std::deque> sorted_ks({INT_MAX, 3, 2, 1}, test_allocator(0, 42)); + std::deque> sorted_vs({4, 5, 2, 1}, test_allocator(0, 43)); + const std::pair expected[] = {{INT_MAX, 4}, {3, 5}, {2, 2}, {1, 1}}; + { + std::pmr::monotonic_buffer_resource mr; + std::pmr::monotonic_buffer_resource mr2; + std::pmr::deque pks(ks.begin(), ks.end(), &mr); + std::pmr::deque pvs(vs.begin(), vs.end(), &mr); + std::flat_map s(std::move(pks), std::move(pvs), std::greater(), &mr2); + + ASSERT_SAME_TYPE( + decltype(s), std::flat_map, std::pmr::deque, std::pmr::deque>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().resource() == &mr2); + assert(s.values().get_allocator().resource() == &mr2); + } + { + std::pmr::monotonic_buffer_resource mr; + std::pmr::monotonic_buffer_resource mr2; + std::pmr::deque pks(sorted_ks.begin(), sorted_ks.end(), &mr); + std::pmr::deque pvs(sorted_vs.begin(), sorted_vs.end(), &mr); + std::flat_map s(std::sorted_unique, std::move(pks), std::move(pvs), std::greater(), &mr2); + + ASSERT_SAME_TYPE( + decltype(s), std::flat_map, std::pmr::deque, std::pmr::deque>); + assert(std::ranges::equal(s, expected)); + assert(s.keys().get_allocator().resource() == &mr2); + assert(s.values().get_allocator().resource() == &mr2); + } +} + +int main(int, char**) { + test_containers(); + test_containers_compare(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default.pass.cpp new file mode 100644 index 000000000000000..c5b94896b92931c --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default.pass.cpp @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(); + +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "min_allocator.h" +#include "test_allocator.h" + +struct DefaultCtableComp { + explicit DefaultCtableComp() { default_constructed_ = true; } + bool operator()(int, int) const { return false; } + bool default_constructed_ = false; +}; + +int main(int, char**) { + { + std::flat_map m; + assert(m.empty()); + } + { + // explicit(false) + std::flat_map m = {}; + assert(m.empty()); + } + { + std::flat_map>> m; + assert(m.empty()); + assert(m.begin() == m.end()); + assert(m.key_comp().default_constructed_); + } + { + using A1 = explicit_allocator; + using A2 = explicit_allocator; + { + std::flat_map, std::vector> m; + assert(m.empty()); + assert(m.key_comp().default_constructed_); + } + { + A1 a1; + std::flat_map, std::vector> m(a1); + assert(m.empty()); + assert(m.key_comp().default_constructed_); + } + } + { + // If an allocator is given, it must be usable by both containers. + using A = test_allocator; + using M = std::flat_map, std::vector, std::vector>; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v>); + static_assert(!std::is_constructible_v); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default_noexcept.pass.cpp new file mode 100644 index 000000000000000..ac24c8a8ac067ee --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default_noexcept.pass.cpp @@ -0,0 +1,57 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map() +// noexcept( +// is_nothrow_default_constructible_v && +// is_nothrow_default_constructible_v && +// is_nothrow_default_constructible_v); + +// This tests a conforming extension + +#include +#include +#include +#include + +#include "test_macros.h" +#include "MoveOnly.h" +#include "test_allocator.h" + +struct ThrowingCtorComp { + ThrowingCtorComp() noexcept(false) {} + bool operator()(const auto&, const auto&) const { return false; } +}; + +int main(int, char**) { +#if defined(_LIBCPP_VERSION) + { + using C = std::flat_map; + static_assert(std::is_nothrow_default_constructible_v); + } + { + using C = std::flat_map, std::vector>>; + static_assert(std::is_nothrow_default_constructible_v); + } +#endif // _LIBCPP_VERSION + { + using C = std::flat_map, std::vector>>; + static_assert(!std::is_nothrow_default_constructible_v); + C c; + } + { + using C = std::flat_map; + static_assert(!std::is_nothrow_default_constructible_v); + C c; + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/dtor_noexcept.pass.cpp new file mode 100644 index 000000000000000..e3ab33a55d95bf5 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/dtor_noexcept.pass.cpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// ~flat_map(); + +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "MoveOnly.h" +#include "test_allocator.h" + +struct ThrowingDtorComp { + bool operator()(const auto&, const auto&) const; + ~ThrowingDtorComp() noexcept(false); +}; + +int main(int, char**) { + { + using C = std::flat_map; + static_assert(std::is_nothrow_destructible_v); + } + { + using V = std::vector>; + using C = std::flat_map, V, V>; + static_assert(std::is_nothrow_destructible_v); + } + { + using V = std::deque>; + using C = std::flat_map, V, V>; + static_assert(std::is_nothrow_destructible_v); + } +#if defined(_LIBCPP_VERSION) + { + using C = std::flat_map; + static_assert(!std::is_nothrow_destructible_v); + } +#endif // _LIBCPP_VERSION + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/initializer_list.pass.cpp new file mode 100644 index 000000000000000..7a22746845d002b --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/initializer_list.pass.cpp @@ -0,0 +1,157 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(initializer_list il, const key_compare& comp = key_compare()); +// template +// flat_map(initializer_list il, const Alloc& a); +// template +// flat_map(initializer_list il, const key_compare& comp, const Alloc& a); + +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "min_allocator.h" +#include "test_allocator.h" + +#include "../../../test_compare.h" + +struct DefaultCtableComp { + explicit DefaultCtableComp() { default_constructed_ = true; } + bool operator()(int, int) const { return false; } + bool default_constructed_ = false; +}; + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + using IL = std::initializer_list>; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + + { + // initializer_list needs to match exactly + using M = std::flat_map; + using C = typename M::key_compare; + static_assert(std::is_constructible_v>>); + static_assert(std::is_constructible_v>, C>); + static_assert(std::is_constructible_v>, C, std::allocator>); + static_assert(std::is_constructible_v>, std::allocator>); + static_assert(!std::is_constructible_v>>); + static_assert(!std::is_constructible_v>, C>); + static_assert( + !std::is_constructible_v>, C, std::allocator>); + static_assert(!std::is_constructible_v>, std::allocator>); + static_assert(!std::is_constructible_v>>); + static_assert(!std::is_constructible_v>, C>); + static_assert( + !std::is_constructible_v>, C, std::allocator>); + static_assert( + !std::is_constructible_v>, std::allocator>); + } + + std::pair expected[] = {{1, 1}, {2, 2}, {3, 3}, {5, 2}}; + { + // flat_map(initializer_list); + using M = std::flat_map; + std::initializer_list> il = {{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}; + M m(il); + assert(std::equal(m.begin(), m.end(), expected, expected + 4)); + } + { + // flat_map(initializer_list); + // explicit(false) + using M = std::flat_map; + M m = {{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}; + assert(std::equal(m.begin(), m.end(), expected, expected + 4)); + } + { + // flat_map(initializer_list); + using M = std::flat_map, std::deque>>; + M m = {{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}; + assert(std::equal(m.rbegin(), m.rend(), expected, expected + 4)); + } + { + using A = explicit_allocator; + { + // flat_map(initializer_list); + // different comparator + using M = std::flat_map, std::deque>; + M m = {{1, 1}, {2, 2}, {3, 3}}; + assert(m.size() == 1); + assert(m.begin()->first == m.begin()->second); + LIBCPP_ASSERT(*m.begin() == std::make_pair(1, 1)); + assert(m.key_comp().default_constructed_); + } + { + // flat_map(initializer_list, const Allocator&); + using M = std::flat_map, std::deque, std::vector>; + A a; + M m({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, a); + assert(std::equal(m.rbegin(), m.rend(), expected, expected + 4)); + } + } + { + // flat_map(initializer_list, const key_compare&); + using C = test_less; + using M = std::flat_map; + auto m = M({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, C(10)); + assert(std::equal(m.begin(), m.end(), expected, expected + 4)); + assert(m.key_comp() == C(10)); + + // explicit(false) + M m2 = {{{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, C(10)}; + assert(m2 == m); + assert(m2.key_comp() == C(10)); + } + { + // flat_map(initializer_list, const key_compare&); + // Sorting uses the comparator that was passed in + using M = std::flat_map, std::deque>>; + auto m = M({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, std::greater()); + assert(std::equal(m.rbegin(), m.rend(), expected, expected + 4)); + assert(m.key_comp()(2, 1) == true); + } + { + // flat_map(initializer_list il, const key_compare& comp, const Alloc& a); + using A = explicit_allocator; + using M = std::flat_map, std::deque, std::vector>; + A a; + M m({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, {}, a); + assert(std::equal(m.rbegin(), m.rend(), expected, expected + 4)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter.pass.cpp new file mode 100644 index 000000000000000..7c0c487969943d9 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter.pass.cpp @@ -0,0 +1,154 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// flat_map(InputIterator first, InputIterator last, const key_compare& comp = key_compare()); +// template +// flat_map(InputIterator first, InputIterator last, const Allocator& a); +// template +// flat_map(InputIterator first, InputIterator last, const key_compare& comp, const Allocator& a); + +#include +#include +#include +#include +#include + +#include "min_allocator.h" +#include "test_allocator.h" +#include "test_iterators.h" +#include "test_macros.h" +#include "../../../test_compare.h" + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + using Iter1 = typename M1::iterator; + using Iter2 = typename M2::iterator; + using Iter3 = typename M3::iterator; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + + using P = std::pair; + P ar[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}}; + P expected[] = {{1, 1}, {2, 4}, {3, 6}}; + { + // flat_map(InputIterator , InputIterator) + // cpp17_input_iterator + using M = std::flat_map; + auto m = M(cpp17_input_iterator(ar), cpp17_input_iterator(ar + 9)); + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + + // explicit(false) + M m2 = {cpp17_input_iterator(ar), cpp17_input_iterator(ar + 9)}; + assert(m2 == m); + } + { + // flat_map(InputIterator , InputIterator) + // greater + using M = std::flat_map, std::deque>, std::deque>; + auto m = M(cpp17_input_iterator(ar), cpp17_input_iterator(ar + 9)); + assert((m.keys() == std::deque>{3, 2, 1})); + LIBCPP_ASSERT((m.values() == std::deque{6, 4, 1})); + } + { + // flat_map(InputIterator , InputIterator) + // Test when the operands are of array type (also contiguous iterator type) + using M = std::flat_map, std::vector>>; + auto m = M(ar, ar); + assert(m.empty()); + } + { + // flat_map(InputIterator , InputIterator, const key_compare&) + using C = test_less; + using M = std::flat_map, std::deque>; + auto m = M(ar, ar + 9, C(3)); + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.key_comp() == C(3)); + + // explicit(false) + M m2 = {ar, ar + 9, C(3)}; + assert(m2 == m); + assert(m2.key_comp() == C(3)); + } + { + // flat_map(InputIterator , InputIterator, const Allocator&) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + auto m = M(ar, ar + 9, A1(5)); + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // flat_map(InputIterator , InputIterator, const Allocator&) + // explicit(false) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + M m = {ar, ar + 9, A1(5)}; // implicit ctor + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // flat_map(InputIterator , InputIterator, const key_compare&, const Allocator&) + using C = test_less; + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::deque>; + auto m = M(ar, ar + 9, C(3), A1(5)); + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.key_comp() == C(3)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // flat_map(InputIterator , InputIterator, const key_compare&, const Allocator&) + // explicit(false) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::deque, std::vector>; + M m = {ar, ar + 9, {}, A2(5)}; // implicit ctor + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp new file mode 100644 index 000000000000000..1ce859f6c737ea4 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp @@ -0,0 +1,65 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// flat_map(InputIterator first, InputIterator last, const key_compare& comp = key_compare()) +// +// libc++ uses stable_sort to ensure that flat_map's behavior matches map's, +// in terms of which duplicate items are kept. +// This tests a conforming extension. + +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" + +struct Mod256 { + bool operator()(int x, int y) const { return (x % 256) < (y % 256); } +}; + +int main(int, char**) { + std::mt19937 randomness; + std::pair pairs[200]; + for (auto& pair : pairs) { + pair = {uint16_t(randomness()), uint16_t(randomness())}; + } + + { + std::map m(pairs, pairs + 200); + std::flat_map fm(pairs, pairs + 200); + assert(fm.size() == m.size()); + LIBCPP_ASSERT(std::ranges::equal(fm, m)); + } + { + std::map m(pairs, pairs + 200, std::allocator()); + std::flat_map fm(pairs, pairs + 200, std::allocator()); + assert(fm.size() == m.size()); + LIBCPP_ASSERT(std::ranges::equal(fm, m)); + } + { + std::map m(pairs, pairs + 200, Mod256()); + std::flat_map fm(pairs, pairs + 200, Mod256()); + assert(fm.size() == m.size()); + LIBCPP_ASSERT(std::ranges::equal(fm, m)); + } + { + std::map m(pairs, pairs + 200, Mod256(), std::allocator()); + std::flat_map fm(pairs, pairs + 200, Mod256(), std::allocator()); + assert(fm.size() == m.size()); + LIBCPP_ASSERT(std::ranges::equal(fm, m)); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move.pass.cpp new file mode 100644 index 000000000000000..955d3156064aae5 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move.pass.cpp @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(flat_map&&); + +#include +#include +#include +#include +#include +#include + +#include "../helpers.h" +#include "test_macros.h" +#include "../../../test_compare.h" +#include "test_allocator.h" +#include "min_allocator.h" + +int main(int, char**) { + { + using C = test_less; + using A = test_allocator; + using M = std::flat_map, std::deque>; + M mo = M({{1, 1}, {2, 2}, {3, 1}}, C(5), A(7)); + M m = std::move(mo); + assert((m == M{{1, 1}, {2, 2}, {3, 1}})); + assert(m.key_comp() == C(5)); + assert(m.keys().get_allocator() == A(7)); + assert(m.values().get_allocator() == A(7)); + + assert(mo.empty()); + assert(mo.key_comp() == C(5)); + assert(mo.keys().get_allocator().get_id() == test_alloc_base::moved_value); + assert(mo.values().get_allocator().get_id() == test_alloc_base::moved_value); + } + { + using C = test_less; + using A = min_allocator; + using M = std::flat_map, std::deque>; + M mo = M({{1, 1}, {2, 2}, {3, 1}}, C(5), A()); + M m = std::move(mo); + assert((m == M{{1, 1}, {2, 2}, {3, 1}})); + assert(m.key_comp() == C(5)); + assert(m.keys().get_allocator() == A()); + assert(m.values().get_allocator() == A()); + + assert(mo.empty()); + assert(mo.key_comp() == C(5)); + assert(m.keys().get_allocator() == A()); + assert(m.values().get_allocator() == A()); + } + { + // A moved-from flat_map maintains its class invariant in the presence of moved-from comparators. + using M = std::flat_map>; + M mo = M({{1, 1}, {2, 2}, {3, 1}}, std::less()); + M m = std::move(mo); + assert(m.size() == 3); + assert(std::is_sorted(m.begin(), m.end(), m.value_comp())); + assert(m.key_comp()(1, 2) == true); + + assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp())); + LIBCPP_ASSERT(m.key_comp()(1, 2) == true); + LIBCPP_ASSERT(mo.empty()); + mo.insert({{1, 1}, {2, 2}, {3, 1}}); // insert has no preconditions + assert(m == mo); + } + { + // moved-from object maintains invariant if one of underlying container does not clear after move + using M = std::flat_map, std::vector, CopyOnlyVector>; + M m1 = M({1, 2, 3}, {1, 2, 3}); + M m2 = std::move(m1); + assert(m2.size() == 3); + check_invariant(m1); + LIBCPP_ASSERT(m1.empty()); + LIBCPP_ASSERT(m1.keys().size() == 0); + LIBCPP_ASSERT(m1.values().size() == 0); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_alloc.pass.cpp new file mode 100644 index 000000000000000..93a397642252005 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_alloc.pass.cpp @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(flat_map&&, const allocator_type&); + +#include +#include +#include +#include +#include +#include + +#include "../helpers.h" +#include "test_macros.h" +#include "../../../test_compare.h" +#include "test_allocator.h" + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + { + std::pair expected[] = {{1, 1}, {2, 2}, {3, 1}}; + using C = test_less; + using A = test_allocator; + using M = std::flat_map, std::deque>; + auto mo = M(expected, expected + 3, C(5), A(7)); + auto m = M(std::move(mo), A(3)); + + assert(m.key_comp() == C(5)); + assert(m.size() == 3); + auto [keys, values] = std::move(m).extract(); + assert(keys.get_allocator() == A(3)); + assert(values.get_allocator() == A(3)); + assert(std::ranges::equal(keys, expected | std::views::elements<0>)); + assert(std::ranges::equal(values, expected | std::views::elements<1>)); + + // The original flat_map is moved-from. + assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp())); + assert(mo.empty()); + assert(mo.key_comp() == C(5)); + assert(mo.keys().get_allocator() == A(7)); + assert(mo.values().get_allocator() == A(7)); + } + { + // moved-from object maintains invariant if one of underlying container does not clear after move + using M = std::flat_map, std::vector, CopyOnlyVector>; + M m1 = M({1, 2, 3}, {1, 2, 3}); + M m2(std::move(m1), std::allocator{}); + assert(m2.size() == 3); + check_invariant(m1); + LIBCPP_ASSERT(m1.empty()); + LIBCPP_ASSERT(m1.keys().size() == 0); + LIBCPP_ASSERT(m1.values().size() == 0); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign.pass.cpp new file mode 100644 index 000000000000000..a94c442c695ddb5 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign.pass.cpp @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map& operator=(flat_map&&); + +#include +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "MoveOnly.h" +#include "../../../test_compare.h" +#include "test_allocator.h" +#include "min_allocator.h" + +int main(int, char**) { + { + using C = test_less; + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::vector>; + M mo = M({{1, 1}, {2, 3}, {3, 2}}, C(5), A1(7)); + M m = M({}, C(3), A1(7)); + m = std::move(mo); + assert((m == M{{1, 1}, {2, 3}, {3, 2}})); + assert(m.key_comp() == C(5)); + auto [ks, vs] = std::move(m).extract(); + assert(ks.get_allocator() == A1(7)); + assert(vs.get_allocator() == A2(7)); + assert(mo.empty()); + } + { + using C = test_less; + using A1 = other_allocator; + using A2 = other_allocator; + using M = std::flat_map, std::deque>; + M mo = M({{4, 5}, {5, 4}}, C(5), A1(7)); + M m = M({{1, 1}, {2, 2}, {3, 3}, {4, 4}}, C(3), A1(7)); + m = std::move(mo); + assert((m == M{{4, 5}, {5, 4}})); + assert(m.key_comp() == C(5)); + auto [ks, vs] = std::move(m).extract(); + assert(ks.get_allocator() == A1(7)); + assert(vs.get_allocator() == A2(7)); + assert(mo.empty()); + } + { + using A = min_allocator; + using M = std::flat_map, std::vector, std::vector>; + M mo = M({{5, 1}, {4, 2}, {3, 3}}, A()); + M m = M({{4, 4}, {3, 3}, {2, 2}, {1, 1}}, A()); + m = std::move(mo); + assert((m == M{{5, 1}, {4, 2}, {3, 3}})); + auto [ks, vs] = std::move(m).extract(); + assert(ks.get_allocator() == A()); + assert(vs.get_allocator() == A()); + assert(mo.empty()); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_clears.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_clears.pass.cpp new file mode 100644 index 000000000000000..f28d52dd4e46332 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_clears.pass.cpp @@ -0,0 +1,104 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map& operator=(flat_map&&); +// Preserves the class invariant for the moved-from flat_map. + +#include +#include +#include +#include +#include +#include +#include + +#include "../helpers.h" +#include "test_macros.h" + +struct MoveNegates { + int value_ = 0; + MoveNegates() = default; + MoveNegates(int v) : value_(v) {} + MoveNegates(MoveNegates&& rhs) : value_(rhs.value_) { rhs.value_ = -rhs.value_; } + MoveNegates& operator=(MoveNegates&& rhs) { + value_ = rhs.value_; + rhs.value_ = -rhs.value_; + return *this; + } + ~MoveNegates() = default; + auto operator<=>(const MoveNegates&) const = default; +}; + +struct MoveClears { + int value_ = 0; + MoveClears() = default; + MoveClears(int v) : value_(v) {} + MoveClears(MoveClears&& rhs) : value_(rhs.value_) { rhs.value_ = 0; } + MoveClears& operator=(MoveClears&& rhs) { + value_ = rhs.value_; + rhs.value_ = 0; + return *this; + } + ~MoveClears() = default; + auto operator<=>(const MoveClears&) const = default; +}; + +int main(int, char**) { + auto value_eq = [](auto&& p, auto&& q) { return p.first == q.first; }; + { + const std::pair expected[] = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}}; + using M = std::flat_map, std::vector>; + M m = M(expected, expected + 8); + M m2 = M(expected, expected + 3); + + m2 = std::move(m); + + assert(std::equal(m2.begin(), m2.end(), expected, expected + 8)); + LIBCPP_ASSERT(m.empty()); + assert(std::is_sorted(m.begin(), m.end(), m.value_comp())); // still sorted + assert(std::adjacent_find(m.begin(), m.end(), value_eq) == m.end()); // still contains no duplicates + m.insert({1, 1}); + m.insert({2, 2}); + assert(m.contains(1)); + assert(m.find(2) != m.end()); + } + { + const std::pair expected[] = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}}; + using M = std::flat_map, std::vector>; + M m = M(expected, expected + 8); + M m2 = M(expected, expected + 3); + + m2 = std::move(m); + + assert(std::equal(m2.begin(), m2.end(), expected, expected + 8)); + LIBCPP_ASSERT(m.empty()); + assert(std::is_sorted(m.begin(), m.end(), m.value_comp())); // still sorted + assert(std::adjacent_find(m.begin(), m.end(), value_eq) == m.end()); // still contains no duplicates + m.insert({1, 1}); + m.insert({2, 2}); + assert(m.contains(1)); + assert(m.find(2) != m.end()); + } + { + // moved-from object maintains invariant if one of underlying container does not clear after move + using M = std::flat_map, std::vector, CopyOnlyVector>; + M m1 = M({1, 2, 3}, {1, 2, 3}); + M m2 = M({1, 2}, {1, 2}); + m2 = std::move(m1); + assert(m2.size() == 3); + check_invariant(m1); + LIBCPP_ASSERT(m1.empty()); + LIBCPP_ASSERT(m1.keys().size() == 0); + LIBCPP_ASSERT(m1.values().size() == 0); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_noexcept.pass.cpp new file mode 100644 index 000000000000000..665b763e6c4f751 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_noexcept.pass.cpp @@ -0,0 +1,110 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map& operator=(flat_map&& c) +// noexcept( +// is_nothrow_move_assignable::value && +// is_nothrow_move_assignable::value && +// is_nothrow_copy_assignable::value); + +// This tests a conforming extension + +#include +#include +#include +#include +#include + +#include "MoveOnly.h" +#include "test_allocator.h" +#include "test_macros.h" + +struct MoveSensitiveComp { + MoveSensitiveComp() noexcept(false) = default; + MoveSensitiveComp(const MoveSensitiveComp&) noexcept(false) = default; + MoveSensitiveComp(MoveSensitiveComp&& rhs) { rhs.is_moved_from_ = true; } + MoveSensitiveComp& operator=(const MoveSensitiveComp&) noexcept = default; + MoveSensitiveComp& operator=(MoveSensitiveComp&& rhs) { + rhs.is_moved_from_ = true; + return *this; + } + bool operator()(const auto&, const auto&) const { return false; } + bool is_moved_from_ = false; +}; + +struct MoveThrowsComp { + MoveThrowsComp(MoveThrowsComp&&) noexcept(false); + MoveThrowsComp(const MoveThrowsComp&) noexcept(true); + MoveThrowsComp& operator=(MoveThrowsComp&&) noexcept(false); + MoveThrowsComp& operator=(const MoveThrowsComp&) noexcept(true); + bool operator()(const auto&, const auto&) const; +}; + +int main(int, char**) { + { + using C = std::flat_map; + LIBCPP_STATIC_ASSERT(std::is_nothrow_move_assignable_v); + } + { + using C = + std::flat_map, + std::vector>, + std::vector>>; + static_assert(!std::is_nothrow_move_assignable_v); + } + { + using C = + std::flat_map, + std::vector>, + std::vector>>; + static_assert(!std::is_nothrow_move_assignable_v); + } + { + using C = + std::flat_map, + std::vector>, + std::vector>>; + LIBCPP_STATIC_ASSERT(std::is_nothrow_move_assignable_v); + } + { + using C = + std::flat_map, + std::vector>, + std::vector>>; + LIBCPP_STATIC_ASSERT(std::is_nothrow_move_assignable_v); + } + { + // Test with a comparator that throws on move-assignment. + using C = std::flat_map; + LIBCPP_STATIC_ASSERT(!std::is_nothrow_move_assignable_v); + } + { + // Test with a container that throws on move-assignment. + using C = std::flat_map, std::pmr::vector, std::vector>; + static_assert(!std::is_nothrow_move_assignable_v); + } + { + // Test with a container that throws on move-assignment. + using C = std::flat_map, std::vector, std::pmr::vector>; + static_assert(!std::is_nothrow_move_assignable_v); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_exceptions.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_exceptions.pass.cpp new file mode 100644 index 000000000000000..cb7e30c2b74fae2 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_exceptions.pass.cpp @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// UNSUPPORTED: no-exceptions + +// + +// flat_map(flat_map&& s); +// If any member function in [flat.map.defn] exits via an exception, the invariant is restored. + +#include +#include +#include +#include +#include +#include + +#include "../helpers.h" +#include "test_macros.h" + +static int countdown = 0; + +struct EvilContainer : std::vector { + EvilContainer() = default; + EvilContainer(EvilContainer&& rhs) { + // Throw on move-construction. + if (--countdown == 0) { + rhs.insert(rhs.end(), 0); + rhs.insert(rhs.end(), 0); + throw 42; + } + } +}; + +int main(int, char**) { + { + using M = std::flat_map, EvilContainer, std::vector>; + M mo = {{1, 1}, {2, 2}, {3, 3}}; + countdown = 1; + try { + M m = std::move(mo); + assert(false); // not reached + } catch (int x) { + assert(x == 42); + } + // The source flat_map maintains its class invariant. + check_invariant(mo); + LIBCPP_ASSERT(mo.empty()); + } + { + using M = std::flat_map, std::vector, EvilContainer>; + M mo = {{1, 1}, {2, 2}, {3, 3}}; + countdown = 1; + try { + M m = std::move(mo); + assert(false); // not reached + } catch (int x) { + assert(x == 42); + } + // The source flat_map maintains its class invariant. + check_invariant(mo); + LIBCPP_ASSERT(mo.empty()); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_noexcept.pass.cpp new file mode 100644 index 000000000000000..d281dafbcf72dd8 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_noexcept.pass.cpp @@ -0,0 +1,102 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(flat_map&&) +// noexcept(is_nothrow_move_constructible::value && +// is_nothrow_move_constructible::value && +// is_nothrow_copy_constructible::value); + +// This tests a conforming extension + +#include +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "MoveOnly.h" +#include "test_allocator.h" + +template +struct ThrowingMoveAllocator { + using value_type = T; + explicit ThrowingMoveAllocator() = default; + ThrowingMoveAllocator(const ThrowingMoveAllocator&) = default; + ThrowingMoveAllocator(ThrowingMoveAllocator&&) noexcept(false) {} + T* allocate(std::ptrdiff_t n) { return std::allocator().allocate(n); } + void deallocate(T* p, std::ptrdiff_t n) { return std::allocator().deallocate(p, n); } + friend bool operator==(ThrowingMoveAllocator, ThrowingMoveAllocator) = default; +}; + +struct ThrowingMoveComp { + ThrowingMoveComp() = default; + ThrowingMoveComp(const ThrowingMoveComp&) noexcept(true) {} + ThrowingMoveComp(ThrowingMoveComp&&) noexcept(false) {} + bool operator()(const auto&, const auto&) const { return false; } +}; + +struct MoveSensitiveComp { + MoveSensitiveComp() noexcept(false) = default; + MoveSensitiveComp(const MoveSensitiveComp&) noexcept = default; + MoveSensitiveComp(MoveSensitiveComp&& rhs) { rhs.is_moved_from_ = true; } + MoveSensitiveComp& operator=(const MoveSensitiveComp&) noexcept(false) = default; + MoveSensitiveComp& operator=(MoveSensitiveComp&& rhs) { + rhs.is_moved_from_ = true; + return *this; + } + bool operator()(const auto&, const auto&) const { return false; } + bool is_moved_from_ = false; +}; + +int main(int, char**) { + { + using C = std::flat_map; + LIBCPP_STATIC_ASSERT(std::is_nothrow_move_constructible_v); + C c; + C d = std::move(c); + } + { + using C = std::flat_map, std::deque>>; + LIBCPP_STATIC_ASSERT(std::is_nothrow_move_constructible_v); + C c; + C d = std::move(c); + } +#if _LIBCPP_VERSION + { + // Container fails to be nothrow-move-constructible; this relies on libc++'s support for non-nothrow-copyable allocators + using C = std::flat_map, std::deque>, std::vector>; + static_assert(!std::is_nothrow_move_constructible_v>>); + static_assert(!std::is_nothrow_move_constructible_v); + C c; + C d = std::move(c); + } + { + // Container fails to be nothrow-move-constructible; this relies on libc++'s support for non-nothrow-copyable allocators + using C = std::flat_map, std::vector, std::deque>>; + static_assert(!std::is_nothrow_move_constructible_v>>); + static_assert(!std::is_nothrow_move_constructible_v); + C c; + C d = std::move(c); + } +#endif // _LIBCPP_VERSION + { + // Comparator fails to be nothrow-move-constructible + using C = std::flat_map; + static_assert(!std::is_nothrow_move_constructible_v); + C c; + C d = std::move(c); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/pmr.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/pmr.pass.cpp new file mode 100644 index 000000000000000..154af11bb9b4db2 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/pmr.pass.cpp @@ -0,0 +1,361 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// UNSUPPORTED: availability-pmr-missing + +// + +// Test various constructors with pmr + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_iterators.h" +#include "test_macros.h" +#include "test_allocator.h" +#include "../../../test_compare.h" + +int main(int, char**) { + { + // flat_map(const Allocator& a); + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::polymorphic_allocator pa = &mr; + auto m1 = M(pa); + assert(m1.empty()); + assert(m1.keys().get_allocator() == pa); + assert(m1.values().get_allocator() == pa); + auto m2 = M(&mr); + assert(m2.empty()); + assert(m2.keys().get_allocator() == pa); + assert(m2.values().get_allocator() == pa); + } + { + // flat_map(const key_compare& comp, const Alloc& a); + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + vm.emplace_back(std::greater()); + assert(vm[0] == M{}); + assert(vm[0].key_comp()(2, 1) == true); + assert(vm[0].value_comp()({2, 0}, {1, 0}) == true); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + // flat_map(const key_container_type& key_cont, const mapped_container_type& mapped_cont, + // const Allocator& a); + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + std::pmr::vector ks = {1, 1, 1, 2, 2, 3, 2, 3, 3}; + std::pmr::vector vs = {1, 1, 1, 2, 2, 3, 2, 3, 3}; + assert(ks.get_allocator().resource() != &mr); + assert(vs.get_allocator().resource() != &mr); + vm.emplace_back(ks, vs); + assert(ks.size() == 9); // ks' value is unchanged, since it was an lvalue above + assert(vs.size() == 9); // vs' value is unchanged, since it was an lvalue above + assert((vm[0] == M{{1, 1}, {2, 2}, {3, 3}})); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + // flat_map(const flat_map&, const allocator_type&); + using C = test_less; + using M = std::flat_map, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr1; + std::pmr::monotonic_buffer_resource mr2; + M mo = M({1, 2, 3}, {2, 2, 1}, C(5), &mr1); + M m = {mo, &mr2}; // also test the implicitness of this constructor + + assert(m.key_comp() == C(5)); + assert((m.keys() == std::pmr::vector{1, 2, 3})); + assert((m.values() == std::pmr::vector{2, 2, 1})); + assert(m.keys().get_allocator().resource() == &mr2); + assert(m.values().get_allocator().resource() == &mr2); + + // mo is unchanged + assert(mo.key_comp() == C(5)); + assert((mo.keys() == std::pmr::vector{1, 2, 3})); + assert((mo.values() == std::pmr::vector{2, 2, 1})); + assert(mo.keys().get_allocator().resource() == &mr1); + assert(mo.values().get_allocator().resource() == &mr1); + } + { + // flat_map(const flat_map&, const allocator_type&); + using M = std::flat_map, std::pmr::vector, std::pmr::deque>; + std::pmr::vector vs; + M m = {{1, 2}, {2, 2}, {3, 1}}; + vs.push_back(m); + assert(vs[0] == m); + } + { + // flat_map& operator=(const flat_map& m); + // pmr allocator is not propagated + using M = std::flat_map, std::pmr::deque, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr1; + std::pmr::monotonic_buffer_resource mr2; + M mo = M({{1, 1}, {2, 2}, {3, 3}}, &mr1); + M m = M({{4, 4}, {5, 5}}, &mr2); + m = mo; + assert((m == M{{1, 1}, {2, 2}, {3, 3}})); + assert(m.keys().get_allocator().resource() == &mr2); + assert(m.values().get_allocator().resource() == &mr2); + + // mo is unchanged + assert((mo == M{{1, 1}, {2, 2}, {3, 3}})); + assert(mo.keys().get_allocator().resource() == &mr1); + } + { + // flat_map(const flat_map& m); + using C = test_less; + std::pmr::monotonic_buffer_resource mr; + using M = std::flat_map, std::pmr::vector>; + auto mo = M({{1, 1}, {2, 2}, {3, 3}}, C(5), &mr); + auto m = mo; + + assert(m.key_comp() == C(5)); + assert((m == M{{1, 1}, {2, 2}, {3, 3}})); + auto [ks, vs] = std::move(m).extract(); + assert(ks.get_allocator().resource() == std::pmr::get_default_resource()); + assert(vs.get_allocator().resource() == std::pmr::get_default_resource()); + + // mo is unchanged + assert(mo.key_comp() == C(5)); + assert((mo == M{{1, 1}, {2, 2}, {3, 3}})); + auto [kso, vso] = std::move(mo).extract(); + assert(kso.get_allocator().resource() == &mr); + assert(vso.get_allocator().resource() == &mr); + } + { + // flat_map(initializer_list il, const Alloc& a); + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + std::initializer_list il = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}}; + vm.emplace_back(il); + assert((vm[0] == M{{1, 1}, {3, 3}, {4, 4}, {5, 5}})); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + // flat_map(initializer_list il, const key_compare& comp, const Alloc& a); + using C = test_less; + using M = std::flat_map, std::pmr::deque>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + std::initializer_list il = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}}; + vm.emplace_back(il, C(5)); + assert((vm[0] == M{{1, 1}, {3, 3}, {4, 4}, {5, 5}})); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + assert(vm[0].key_comp() == C(5)); + } + { + // flat_map(InputIterator first, InputIterator last, const Allocator& a); + using P = std::pair; + P ar[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}}; + P expected[] = {{1, 1}, {2, 4}, {3, 6}}; + { + // cpp17 iterator + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + vm.emplace_back(cpp17_input_iterator(ar), cpp17_input_iterator(ar + 9)); + assert(std::ranges::equal(vm[0].keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(vm[0], expected)); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + vm.emplace_back(ar, ar); + assert(vm[0].empty()); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + } + { + // flat_map(flat_map&&, const allocator_type&); + std::pair expected[] = {{1, 1}, {2, 2}, {3, 1}}; + using C = test_less; + using M = std::flat_map, std::pmr::deque>; + std::pmr::monotonic_buffer_resource mr1; + std::pmr::monotonic_buffer_resource mr2; + M mo = M({{1, 1}, {3, 1}, {1, 1}, {2, 2}}, C(5), &mr1); + M m = {std::move(mo), &mr2}; // also test the implicitness of this constructor + + assert(m.key_comp() == C(5)); + assert(m.size() == 3); + assert(m.keys().get_allocator().resource() == &mr2); + assert(m.values().get_allocator().resource() == &mr2); + assert(std::equal(m.begin(), m.end(), expected, expected + 3)); + + // The original flat_map is moved-from. + assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp())); + assert(mo.key_comp() == C(5)); + assert(mo.keys().get_allocator().resource() == &mr1); + assert(mo.values().get_allocator().resource() == &mr1); + } + { + // flat_map(flat_map&&, const allocator_type&); + using M = std::flat_map, std::pmr::deque, std::pmr::vector>; + std::pmr::vector vs; + M m = {{1, 1}, {3, 1}, {1, 1}, {2, 2}}; + vs.push_back(std::move(m)); + assert((vs[0].keys() == std::pmr::deque{1, 2, 3})); + assert((vs[0].values() == std::pmr::vector{1, 2, 1})); + } + { + // flat_map& operator=(flat_map&&); + using M = + std::flat_map, std::pmr::vector, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr1; + std::pmr::monotonic_buffer_resource mr2; + M mo = M({{"short", 1}, + {"very long string that definitely won't fit in the SSO buffer and therefore becomes empty on move", 2}}, + &mr1); + M m = M({{"don't care", 3}}, &mr2); + m = std::move(mo); + assert(m.size() == 2); + assert(std::is_sorted(m.begin(), m.end(), m.value_comp())); + assert(m.begin()->first.get_allocator().resource() == &mr2); + + assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp())); + mo.insert({"foo", 1}); + assert(mo.begin()->first.get_allocator().resource() == &mr1); + } + { + // flat_map(from_range_t, R&&, const Alloc&); + using P = std::pair; + P ar[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}}; + P expected[] = {{1, 1}, {2, 4}, {3, 6}}; + { + // input_range + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + using Iter = cpp20_input_iterator; + using Sent = sentinel_wrapper; + using R = std::ranges::subrange; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + vm.emplace_back(std::from_range, R(Iter(ar), Sent(Iter(ar + 9)))); + assert(std::ranges::equal(vm[0].keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(vm[0], expected)); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + using R = std::ranges::subrange; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + vm.emplace_back(std::from_range, R(ar, ar)); + assert(vm[0].empty()); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + } + { + // flat_map(sorted_unique_t, const key_container_type& key_cont, + // const mapped_container_type& mapped_cont, const Alloc& a); + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + std::pmr::vector ks = {1, 2, 4, 10}; + std::pmr::vector vs = {4, 3, 2, 1}; + vm.emplace_back(std::sorted_unique, ks, vs); + assert(!ks.empty()); // it was an lvalue above + assert(!vs.empty()); // it was an lvalue above + assert((vm[0] == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + // flat_map(sorted_unique_t, const key_container_type& key_cont, + // const mapped_container_type& mapped_cont, const Alloc& a); + using M = std::flat_map, std::pmr::vector, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + std::pmr::vector ks({1, 2, 4, 10}, &mr); + std::pmr::vector vs({4, 3, 2, 1}, &mr); + vm.emplace_back(std::sorted_unique, ks, vs); + assert((vm[0] == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + // flat_map(sorted_unique_t, initializer_list il, const Alloc& a); + // cpp_17 + using C = test_less; + using M = std::flat_map, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + using P = std::pair; + P ar[] = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + vm.emplace_back( + std::sorted_unique, cpp17_input_iterator(ar), cpp17_input_iterator(ar + 4), C(3)); + assert((vm[0] == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}})); + assert(vm[0].key_comp() == C(3)); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + // flat_map(sorted_unique_t, initializer_list il, const Alloc& a); + using C = test_less; + using M = std::flat_map, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + std::pair ar[1] = {{42, 42}}; + vm.emplace_back(std::sorted_unique, ar, ar, C(4)); + assert(vm[0] == M{}); + assert(vm[0].key_comp() == C(4)); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + // flat_map(InputIterator first, InputIterator last, const Alloc& a); + // cpp_17 + using C = test_less; + using M = std::flat_map, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + using P = std::pair; + P ar[] = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + vm.emplace_back( + std::sorted_unique, cpp17_input_iterator(ar), cpp17_input_iterator(ar + 4), C(3)); + assert((vm[0] == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}})); + assert(vm[0].key_comp() == C(3)); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + { + // flat_map(InputIterator first, InputIterator last, const Alloc& a); + using C = test_less; + using M = std::flat_map, std::pmr::vector>; + std::pmr::monotonic_buffer_resource mr; + std::pmr::vector vm(&mr); + std::pair ar[1] = {{42, 42}}; + vm.emplace_back(std::sorted_unique, ar, ar, C(4)); + assert(vm[0] == M{}); + assert(vm[0].key_comp() == C(4)); + assert(vm[0].keys().get_allocator().resource() == &mr); + assert(vm[0].values().get_allocator().resource() == &mr); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/range.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/range.pass.cpp new file mode 100644 index 000000000000000..282cc71f31994fe --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/range.pass.cpp @@ -0,0 +1,227 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template R> +// flat_map(from_range_t, R&&) +// template R> +// flat_map(from_range_t, R&&, const key_compare&) +// template R, class Alloc> +// flat_map(from_range_t, R&&, const Alloc&); +// template R, class Alloc> +// flat_map(from_range_t, R&&, const key_compare&, const Alloc&); + +#include +#include +#include +#include +#include +#include + +#include "min_allocator.h" +#include "test_allocator.h" +#include "test_iterators.h" +#include "test_macros.h" +#include "../../../test_compare.h" + +// test constraint container-compatible-range + +template +using RangeOf = std::ranges::subrange; +using Map = std::flat_map; + +static_assert(std::is_constructible_v>>); +static_assert(std::is_constructible_v>>); +static_assert(!std::is_constructible_v>); +static_assert(!std::is_constructible_v>); + +static_assert(std::is_constructible_v>, std::less>); +static_assert(std::is_constructible_v>, std::less>); +static_assert(!std::is_constructible_v, std::less>); +static_assert(!std::is_constructible_v, std::less>); + +static_assert(std::is_constructible_v>, std::allocator>); +static_assert(std::is_constructible_v>, std::allocator>); +static_assert(!std::is_constructible_v, std::allocator>); +static_assert(!std::is_constructible_v, std::allocator>); + +static_assert(std::is_constructible_v>, + std::less, + std::allocator>); +static_assert(std::is_constructible_v>, + std::less, + std::allocator>); +static_assert(!std::is_constructible_v, std::less, std::allocator>); +static_assert(!std::is_constructible_v, std::less, std::allocator>); + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + { + // container-compatible-range + using C = test_less; + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::vector>; + using Pair = std::pair; + using PairLike = std::tuple; + using NonPairLike = int; + + static_assert(std::is_constructible_v&>); + static_assert(std::is_constructible_v&>); + static_assert(!std::is_constructible_v&>); + + static_assert(std::is_constructible_v&, const C&>); + static_assert(std::is_constructible_v&, const C&>); + static_assert(!std::is_constructible_v&, const C&>); + + static_assert(std::is_constructible_v&, const A1&>); + static_assert(std::is_constructible_v&, const A1&>); + static_assert(!std::is_constructible_v&, const A1&>); + + static_assert(std::is_constructible_v&, const C&, const A1&>); + static_assert(std::is_constructible_v&, const C&, const A1&>); + static_assert(!std::is_constructible_v&, const C&, const A1&>); + } + + using P = std::pair; + P ar[] = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}}; + P expected[] = {{1, 1}, {2, 4}, {3, 6}}; + { + // flat_map(from_range_t, R&&) + // input_range && !common + using M = std::flat_map; + using Iter = cpp20_input_iterator; + using Sent = sentinel_wrapper; + using R = std::ranges::subrange; + auto m = M(std::from_range, R(Iter(ar), Sent(Iter(ar + 9)))); + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + + // explicit(false) + M m2 = {std::from_range, R(Iter(ar), Sent(Iter(ar + 9)))}; + assert(m2 == m); + } + { + // flat_map(from_range_t, R&&) + // greater + using M = std::flat_map, std::deque>, std::deque>; + using Iter = cpp20_input_iterator; + using Sent = sentinel_wrapper; + using R = std::ranges::subrange; + auto m = M(std::from_range, R(Iter(ar), Sent(Iter(ar + 9)))); + assert((m.keys() == std::deque>{3, 2, 1})); + LIBCPP_ASSERT((m.values() == std::deque{6, 4, 1})); + } + { + // flat_map(from_range_t, R&&) + // contiguous range + using M = std::flat_map; + using R = std::ranges::subrange; + auto m = M(std::from_range, R(ar, ar + 9)); + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + } + { + // flat_map(from_range_t, R&&, const key_compare&) + using C = test_less; + using M = std::flat_map, std::deque>; + using R = std::ranges::subrange; + auto m = M(std::from_range, R(ar, ar + 9), C(3)); + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.key_comp() == C(3)); + + // explicit(false) + M m2 = {std::from_range, R(ar, ar + 9), C(3)}; + assert(m2 == m); + assert(m2.key_comp() == C(3)); + } + { + // flat_map(from_range_t, R&&, const Allocator&) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + using R = std::ranges::subrange; + auto m = M(std::from_range, R(ar, ar + 9), A1(5)); + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // flat_map(from_range_t, R&&, const Allocator&) + // explicit(false) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + using R = std::ranges::subrange; + M m = {std::from_range, R(ar, ar + 9), A1(5)}; // implicit ctor + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // flat_map(from_range_t, R&&, const key_compare&, const Allocator&) + using C = test_less; + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::deque>; + using R = std::ranges::subrange; + auto m = M(std::from_range, R(ar, ar + 9), C(3), A1(5)); + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.key_comp() == C(3)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // flat_map(from_range_t, R&&, const key_compare&, const Allocator&) + // explicit(false) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::deque, std::vector>; + using R = std::ranges::subrange; + M m = {std::from_range, R(ar, ar + 9), {}, A2(5)}; // implicit ctor + assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>)); + LIBCPP_ASSERT(std::ranges::equal(m, expected)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_container.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_container.pass.cpp new file mode 100644 index 000000000000000..3c8868f2ff4247d --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_container.pass.cpp @@ -0,0 +1,165 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map(sorted_unique_t, key_container_type key_cont, mapped_container_type mapped_cont, +// const key_compare& comp = key_compare()); +// +// template +// flat_map(sorted_unique_t, const key_container_type& key_cont, +// const mapped_container_type& mapped_cont, const Alloc& a); +// template +// flat_map(sorted_unique_t, const key_container_type& key_cont, +// const mapped_container_type& mapped_cont, +// const key_compare& comp, const Alloc& a); + +#include +#include +#include +#include + +#include "min_allocator.h" +#include "MoveOnly.h" +#include "test_allocator.h" +#include "test_iterators.h" +#include "test_macros.h" +#include "../../../test_compare.h" + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + { + // flat_map(sorted_unique_t, key_container_type , mapped_container_type) + using M = std::flat_map; + std::vector ks = {1, 2, 4, 10}; + std::vector vs = {4, 3, 2, 1}; + auto ks2 = ks; + auto vs2 = vs; + + auto m = M(std::sorted_unique, ks, vs); + assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + m = M(std::sorted_unique, std::move(ks), std::move(vs)); + assert(ks.empty()); // it was moved-from + assert(vs.empty()); // it was moved-from + assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + + // explicit(false) + M m2 = {std::sorted_unique, std::move(ks2), std::move(vs2)}; + assert(m == m2); + } + { + // flat_map(sorted_unique_t, key_container_type , mapped_container_type) + // non-default container, comparator and allocator type + using Ks = std::deque>; + using Vs = std::deque>; + using M = std::flat_map, Ks, Vs>; + Ks ks = {10, 4, 2, 1}; + Vs vs = {1, 2, 3, 4}; + auto m = M(std::sorted_unique, ks, vs); + assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + m = M(std::sorted_unique, std::move(ks), std::move(vs)); + assert(ks.empty()); // it was moved-from + assert(vs.empty()); // it was moved-from + assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + } + { + // flat_map(sorted_unique_t, key_container_type , mapped_container_type) + // allocator copied into the containers + using A = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + auto ks = std::vector({1, 2, 4, 10}, A(4)); + auto vs = std::deque({4, 3, 2, 1}, A(5)); + auto m = M(std::sorted_unique, std::move(ks), std::move(vs)); + assert(ks.empty()); // it was moved-from + assert(vs.empty()); // it was moved-from + assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + assert(m.keys().get_allocator() == A(4)); + assert(m.values().get_allocator() == A(5)); + } + { + // flat_map(sorted_unique_t, key_container_type , mapped_container_type, key_compare) + using C = test_less; + using M = std::flat_map; + std::vector ks = {1, 2, 4, 10}; + std::vector vs = {4, 3, 2, 1}; + + auto m = M(std::sorted_unique, ks, vs, C(4)); + assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + assert(m.key_comp() == C(4)); + + // explicit(false) + M m2 = {std::sorted_unique, ks, vs, C(4)}; + assert(m2 == m); + assert(m2.key_comp() == C(4)); + } + { + // flat_map(sorted_unique_t, key_container_type , mapped_container_type, key_compare, const Allocator&) + using C = test_less; + using A = test_allocator; + using M = std::flat_map, std::vector>; + std::vector ks = {1, 2, 4, 10}; + std::vector vs = {4, 3, 2, 1}; + auto m = M(std::sorted_unique, ks, vs, C(4), A(5)); + assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + assert(m.key_comp() == C(4)); + assert(m.keys().get_allocator() == A(5)); + assert(m.values().get_allocator() == A(5)); + + // explicit(false) + M m2 = {ks, vs, C(4), A(5)}; + assert(m2 == m); + assert(m2.key_comp() == C(4)); + assert(m2.keys().get_allocator() == A(5)); + assert(m2.values().get_allocator() == A(5)); + } + { + // flat_map(sorted_unique_t, key_container_type , mapped_container_type, const Allocator&) + using A = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + auto ks = std::vector({1, 2, 4, 10}, A(4)); + auto vs = std::deque({4, 3, 2, 1}, A(5)); + auto m = M(std::sorted_unique, ks, vs, A(6)); // replaces the allocators + assert(!ks.empty()); // it was an lvalue above + assert(!vs.empty()); // it was an lvalue above + assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}})); + assert(m.keys().get_allocator() == A(6)); + assert(m.values().get_allocator() == A(6)); + + // explicit(false) + M m2 = {std::sorted_unique, ks, vs, A(6)}; + assert(m2 == m); + assert(m2.keys().get_allocator() == A(6)); + assert(m2.values().get_allocator() == A(6)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_initializer_list.pass.cpp new file mode 100644 index 000000000000000..26452472ba20112 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_initializer_list.pass.cpp @@ -0,0 +1,179 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// flat_map(sorted_unique_t s, initializer_list il, +// const key_compare& comp = key_compare()) +// template +// flat_map(sorted_unique_t, initializer_list il, const Alloc& a); +// template +// flat_map(sorted_unique_t, initializer_list il, +// const key_compare& comp, const Alloc& a); + +#include +#include +#include +#include + +#include "min_allocator.h" +#include "test_allocator.h" +#include "test_iterators.h" +#include "test_macros.h" +#include "../../../test_compare.h" + +template +std::initializer_list> il = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + +const auto il1 = il; +const auto il2 = il; +const auto il3 = il; + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + using IL = std::initializer_list>; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + { + // initializer_list needs to match exactly + using M = std::flat_map; + using C = typename M::key_compare; + static_assert(std::is_constructible_v>>); + static_assert(std::is_constructible_v>, C>); + static_assert(std::is_constructible_v>, + C, + std::allocator>); + static_assert(std::is_constructible_v>, + std::allocator>); + static_assert( + !std::is_constructible_v>>); + static_assert( + !std::is_constructible_v>, C>); + static_assert(!std::is_constructible_v>, + C, + std::allocator>); + static_assert(!std::is_constructible_v>, + std::allocator>); + static_assert( + !std::is_constructible_v>>); + static_assert( + !std::is_constructible_v>, C>); + static_assert(!std::is_constructible_v>, + C, + std::allocator>); + static_assert(!std::is_constructible_v>, + std::allocator>); + } + + { + // flat_map(sorted_unique_t, initializer_list); + using M = std::flat_map; + auto m = M(std::sorted_unique, il1); + auto expected = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + assert(m == expected); + + // explicit(false) + M m2 = {std::sorted_unique, il1}; + assert(m2 == m); + } + { + // flat_map(sorted_unique_t, initializer_list, const key_compare&); + using M = std::flat_map>; + auto m = M(std::sorted_unique, il1, std::less()); + assert(m == M({{1, 1}, {2, 2}, {4, 4}, {5, 5}}, std::less<>())); + assert(m.key_comp()(1, 2) == true); + + // explicit(false) + M m2 = {std::sorted_unique, il1, std::less()}; + assert(m2 == m); + } + { + // flat_map(sorted_unique_t, initializer_list, const key_compare&); + // greater + using M = std::flat_map, std::deque>, std::vector>; + std::initializer_list> il4{{5, 5}, {4, 4}, {2, 2}, {1, 1}}; + auto m = M(std::sorted_unique, il4, std::greater()); + assert((m == M{{5, 5}, {4, 4}, {2, 2}, {1, 1}})); + } + { + // flat_map(sorted_unique_t, initializer_list, const Allocator&) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + auto m = M(std::sorted_unique, il2, A1(5)); + auto expected = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + assert(m == expected); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + + // explicit(false) + M m2 = {std::sorted_unique, il2, A1(5)}; + assert(m2 == m); + assert(m2.keys().get_allocator() == A1(5)); + assert(m2.values().get_allocator() == A2(5)); + } + { + // flat_map(sorted_unique_t, initializer_list, const key_compare&, const Allocator&); + using C = test_less; + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::deque>; + auto m = M(std::sorted_unique, il2, C(3), A1(5)); + assert((m == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}})); + assert(m.key_comp() == C(3)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // flat_map(sorted_unique_t, initializer_list, const key_compare&, const Allocator&); + // explicit(false) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::deque, std::vector>; + M m = {std::sorted_unique, il3, {}, A1(5)}; // implicit ctor + assert((m == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}})); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_iter_iter.pass.cpp new file mode 100644 index 000000000000000..8eb7547e917cca0 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_iter_iter.pass.cpp @@ -0,0 +1,171 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// flat_map(sorted_unique_t, InputIterator first, InputIterator last, const key_compare& comp = key_compare()); +// template +// flat_map(InputIterator first, InputIterator last, const Alloc& a); +// template +// flat_map(sorted_unique_t, InputIterator first, InputIterator last, const key_compare& comp, const Allocator& a); + +#include +#include +#include +#include + +#include "min_allocator.h" +#include "test_allocator.h" +#include "test_iterators.h" +#include "test_macros.h" +#include "../../../test_compare.h" + +int main(int, char**) { + { + // The constructors in this subclause shall not participate in overload + // resolution unless uses_allocator_v is true + // and uses_allocator_v is true. + using C = test_less; + using A1 = test_allocator; + using A2 = other_allocator; + using V1 = std::vector; + using V2 = std::vector; + using M1 = std::flat_map; + using M2 = std::flat_map; + using M3 = std::flat_map; + using Iter1 = typename M1::iterator; + using Iter2 = typename M2::iterator; + using Iter3 = typename M3::iterator; + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + + static_assert(std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + static_assert(!std::is_constructible_v); + } + { + // flat_map(sorted_unique_t, InputIterator, InputIterator); + // cpp17_input_iterator + using M = std::flat_map; + using P = std::pair; + P ar[] = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + auto m = M(std::sorted_unique, cpp17_input_iterator(ar), cpp17_input_iterator(ar + 4)); + auto expected = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + assert(m == expected); + + // explicit(false) + M m2 = {std::sorted_unique, cpp17_input_iterator(ar), cpp17_input_iterator(ar + 4)}; + assert(m2 == m); + } + { + // flat_map(sorted_unique_t, InputIterator, InputIterator); + // contiguous iterator + using C = test_less; + using M = std::flat_map>, std::vector>>; + std::pair ar[] = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + auto m = M(std::sorted_unique, ar, ar + 4); + auto expected = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + assert(m == expected); + } + { + // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&); + // cpp_17_input_iterator + using M = std::flat_map>; + using P = std::pair; + P ar[] = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + auto m = M(std::sorted_unique, + cpp17_input_iterator(ar), + cpp17_input_iterator(ar + 4), + std::less()); + assert(m == M({{1, 1}, {2, 2}, {4, 4}, {5, 5}}, std::less<>())); + assert(m.key_comp()(1, 2) == true); + + // explicit(false) + M m2 = {std::sorted_unique, + cpp17_input_iterator(ar), + cpp17_input_iterator(ar + 4), + std::less()}; + assert(m2 == m); + } + { + // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&); + // greater + using M = std::flat_map, std::deque>, std::vector>; + using P = std::pair; + P ar[] = {{5, 5}, {4, 4}, {2, 2}, {1, 1}}; + auto m = M(std::sorted_unique, + cpp17_input_iterator(ar), + cpp17_input_iterator(ar + 4), + std::greater()); + assert((m == M{{5, 5}, {4, 4}, {2, 2}, {1, 1}})); + } + { + // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&); + // contiguous iterator + using C = test_less; + using M = std::flat_map>, std::vector>>; + std::pair ar[1] = {{42, 42}}; + auto m = M(std::sorted_unique, ar, ar, C(5)); + assert(m.empty()); + assert(m.key_comp() == C(5)); + } + { + // flat_map(sorted_unique_t, InputIterator , InputIterator, const Allocator&) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::vector, std::deque>; + using P = std::pair; + P ar[] = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + auto m = M(std::sorted_unique, ar, ar + 4, A1(5)); + auto expected = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + assert(m == expected); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + + // explicit(false) + M m2 = {std::sorted_unique, ar, ar + 4, A1(5)}; + assert(m2 == m); + assert(m2.keys().get_allocator() == A1(5)); + assert(m2.values().get_allocator() == A2(5)); + } + { + // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&, const Allocator&); + using C = test_less; + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::deque>; + using P = std::pair; + P ar[] = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + auto m = M(std::sorted_unique, ar, ar + 4, C(3), A1(5)); + assert((m == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}})); + assert(m.key_comp() == C(3)); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + { + // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&, const Allocator&); + // explicit(false) + using A1 = test_allocator; + using A2 = test_allocator; + using M = std::flat_map, std::deque, std::vector>; + using P = std::pair; + P ar[] = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + M m = {std::sorted_unique, ar, ar + 4, {}, A1(5)}; // implicit ctor + assert((m == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}})); + assert(m.keys().get_allocator() == A1(5)); + assert(m.values().get_allocator() == A2(5)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if.pass.cpp new file mode 100644 index 000000000000000..fb0563eec5376b8 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if.pass.cpp @@ -0,0 +1,93 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// typename flat_map::size_type +// erase_if(flat_map& c, Predicate pred); + +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "test_allocator.h" +#include "min_allocator.h" + +// Verify that `flat_map` (like `map`) does NOT support std::erase. +// +template +concept HasStdErase = requires(S& s, typename S::value_type x) { std::erase(s, x); }; +static_assert(HasStdErase>); +static_assert(!HasStdErase>); + +template +M make(std::initializer_list vals) { + M ret; + for (int v : vals) + ret[static_cast(v)] = static_cast(v + 10); + return ret; +} + +template +void test0( + std::initializer_list vals, Pred p, std::initializer_list expected, std::size_t expected_erased_count) { + M s = make(vals); + ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); + assert(expected_erased_count == std::erase_if(s, p)); + assert(s == make(expected)); +} + +template +void test() { + // Test all the plausible signatures for this predicate. + auto is1 = [](typename S::const_reference v) { return v.first == 1; }; + auto is2 = [](typename S::value_type v) { return v.first == 2; }; + auto is3 = [](const typename S::value_type& v) { return v.first == 3; }; + auto is4 = [](auto v) { return v.first == 4; }; + auto True = [](const auto&) { return true; }; + auto False = [](auto&&) { return false; }; + + test0({}, is1, {}, 0); + + test0({1}, is1, {}, 1); + test0({1}, is2, {1}, 0); + + test0({1, 2}, is1, {2}, 1); + test0({1, 2}, is2, {1}, 1); + test0({1, 2}, is3, {1, 2}, 0); + + test0({1, 2, 3}, is1, {2, 3}, 1); + test0({1, 2, 3}, is2, {1, 3}, 1); + test0({1, 2, 3}, is3, {1, 2}, 1); + test0({1, 2, 3}, is4, {1, 2, 3}, 0); + + test0({1, 2, 3}, True, {}, 3); + test0({1, 2, 3}, False, {1, 2, 3}, 0); +} + +int main(int, char**) { + test>(); + test, + std::vector>, + std::vector>>>(); + test, std::vector>>>(); + test, std::deque>>>(); + test, std::deque>>>(); + test>(); + test>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if_exceptions.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if_exceptions.pass.cpp new file mode 100644 index 000000000000000..48fdec42db3fcba --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if_exceptions.pass.cpp @@ -0,0 +1,155 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// UNSUPPORTED: no-exceptions + +// + +// template +// typename flat_map::size_type +// erase_if(flat_map& c, Predicate pred); +// If any member function in [flat.set.defn] exits via an exception, the invariant is restored. +// (This is not a member function, but let's respect the invariant anyway.) + +#include +#include +#include +#include +#include +#include +#include + +#include "../helpers.h" +#include "test_macros.h" + +struct Counter { + int c1, c2, throws; + void tick() { + c1 -= 1; + if (c1 == 0) { + c1 = c2; + throws += 1; + throw 42; + } + } +}; +Counter g_counter = {0, 0, 0}; + +struct ThrowingAssignment { + ThrowingAssignment(int i) : i_(i) {} + ThrowingAssignment(const ThrowingAssignment&) = default; + ThrowingAssignment& operator=(const ThrowingAssignment& rhs) { + g_counter.tick(); + i_ = rhs.i_; + g_counter.tick(); + return *this; + } + operator int() const { return i_; } + int i_; +}; + +struct ThrowingComparator { + bool operator()(const ThrowingAssignment& a, const ThrowingAssignment& b) const { + g_counter.tick(); + return a.i_ < b.i_; + } +}; + +struct ErasurePredicate { + bool operator()(const auto& x) const { return (3 <= x.first && x.first <= 5); } +}; + +int main(int, char**) { + const std::pair expected[] = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}}; + { + using M = std::flat_map; + for (int first_throw = 1; first_throw < 99; ++first_throw) { + for (int second_throw = 1; second_throw < 99; ++second_throw) { + g_counter = {0, 0, 0}; + M m = M({1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4, 5, 6, 7, 8}); + try { + g_counter = {first_throw, second_throw, 0}; + auto n = std::erase_if(m, ErasurePredicate()); + assert(n == 3); + // If it didn't throw at all, we're done. + g_counter = {0, 0, 0}; + assert((m == M{{1, 1}, {2, 2}, {6, 6}, {7, 7}, {8, 8}})); + first_throw = 99; // "done" + break; + } catch (int ex) { + assert(ex == 42); + check_invariant(m); + LIBCPP_ASSERT(m.empty() || std::equal(m.begin(), m.end(), expected, expected + 8)); + if (g_counter.throws == 1) { + // We reached the first throw but not the second throw. + break; + } + } + } + } + } + { + using M = std::flat_map; + for (int first_throw = 1; first_throw < 99; ++first_throw) { + for (int second_throw = 1; second_throw < 99; ++second_throw) { + g_counter = {0, 0, 0}; + M m = M({1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4, 5, 6, 7, 8}); + try { + g_counter = {first_throw, second_throw, 0}; + auto n = std::erase_if(m, ErasurePredicate()); + assert(n == 3); + // If it didn't throw at all, we're done. + g_counter = {0, 0, 0}; + assert((m == M{{1, 1}, {2, 2}, {6, 6}, {7, 7}, {8, 8}})); + first_throw = 99; // "done" + break; + } catch (int ex) { + assert(ex == 42); + check_invariant(m); + LIBCPP_ASSERT(m.empty() || std::equal(m.begin(), m.end(), expected, expected + 8)); + if (g_counter.throws == 1) { + // We reached the first throw but not the second throw. + break; + } + } + } + } + } + { + using M = + std::flat_map, std::deque>; + for (int first_throw = 1; first_throw < 99; ++first_throw) { + for (int second_throw = 1; second_throw < 99; ++second_throw) { + g_counter = {0, 0, 0}; + std::deque container = {5, 6, 7, 8}; + container.insert(container.begin(), {1, 2, 3, 4}); + M m = M(std::move(container), {1, 2, 3, 4, 5, 6, 7, 8}); + try { + g_counter = {first_throw, second_throw, 0}; + auto n = std::erase_if(m, ErasurePredicate()); + assert(n == 3); + // If it didn't throw at all, we're done. + g_counter = {0, 0, 0}; + assert((m == M{{1, 1}, {2, 2}, {6, 6}, {7, 7}, {8, 8}})); + first_throw = 99; // "done" + break; + } catch (int ex) { + assert(ex == 42); + check_invariant(m); + LIBCPP_ASSERT(m.empty() || std::equal(m.begin(), m.end(), expected, expected + 8)); + if (g_counter.throws == 1) { + // We reached the first throw but not the second throw. + break; + } + } + } + } + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator.pass.cpp new file mode 100644 index 000000000000000..b63ce6b19ee165b --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator.pass.cpp @@ -0,0 +1,96 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// iterator begin() noexcept; +// const_iterator begin() const noexcept +// iterator end() noexcept; +// const_iterator end() const noexcept; +// +// const_iterator cbegin() const noexcept; +// const_iterator cend() const noexcept; + +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + M m = {{1, 'a'}, {2, 'b'}, {3, 'c'}, {4, 'd'}}; + const M& cm = m; + ASSERT_SAME_TYPE(decltype(m.begin()), typename M::iterator); + ASSERT_SAME_TYPE(decltype(m.cbegin()), typename M::const_iterator); + ASSERT_SAME_TYPE(decltype(cm.begin()), typename M::const_iterator); + ASSERT_SAME_TYPE(decltype(m.end()), typename M::iterator); + ASSERT_SAME_TYPE(decltype(m.cend()), typename M::const_iterator); + ASSERT_SAME_TYPE(decltype(cm.end()), typename M::const_iterator); + static_assert(noexcept(m.begin())); + static_assert(noexcept(cm.begin())); + static_assert(noexcept(m.cbegin())); + static_assert(noexcept(m.end())); + static_assert(noexcept(cm.end())); + static_assert(noexcept(m.cend())); + assert(m.size() == 4); + assert(std::distance(m.begin(), m.end()) == 4); + assert(std::distance(cm.begin(), cm.end()) == 4); + assert(std::distance(m.cbegin(), m.cend()) == 4); + typename M::iterator i; // default-construct + i = m.begin(); // move-assignment + typename M::const_iterator k = i; // converting constructor + assert(i == k); // comparison + for (int j = 1; j <= 4; ++j, ++i) { // pre-increment + assert(i->first == j); // operator-> + assert(i->second == 'a' + j - 1); + } + assert(i == m.end()); + for (int j = 4; j >= 1; --j) { + --i; // pre-decrement + assert((*i).first == j); + assert((*i).second == 'a' + j - 1); + } + assert(i == m.begin()); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + // N3644 testing + using C = std::flat_map; + C::iterator ii1{}, ii2{}; + C::iterator ii4 = ii1; + C::const_iterator cii{}; + assert(ii1 == ii2); + assert(ii1 == ii4); + assert(!(ii1 != ii2)); + + assert((ii1 == cii)); + assert((cii == ii1)); + assert(!(ii1 != cii)); + assert(!(cii != ii1)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_comparison.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_comparison.pass.cpp new file mode 100644 index 000000000000000..1975d0ed86cc8b8 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_comparison.pass.cpp @@ -0,0 +1,155 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// flat_map iterators should be C++20 random access iterators + +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using KI = typename KeyContainer::iterator; + using I = M::iterator; + using CI = M::const_iterator; + using RI = M::reverse_iterator; + using CRI = M::const_reverse_iterator; + + static_assert(std::equality_comparable); + static_assert(std::equality_comparable); + static_assert(std::equality_comparable); + static_assert(std::equality_comparable); + + static_assert(std::totally_ordered); + static_assert(std::totally_ordered); + static_assert(std::totally_ordered); + static_assert(std::totally_ordered); + + M m = {{1, 'a'}, {2, 'b'}, {3, 'c'}, {4, 'd'}}; + + I i1 = m.begin(); + I i2 = m.begin() + 1; + + assert(i1 == i1); + assert(!(i1 != i1)); + assert(i1 != i2); + assert(!(i1 == i2)); + assert(i1 < i2); + assert(!(i1 < i1)); + assert(i1 <= i1); + assert(i1 <= i2); + assert(!(i2 <= i1)); + assert(i2 > i1); + assert(!(i2 > i2)); + assert(i2 >= i1); + assert(i2 >= i2); + assert(!(i1 >= i2)); + + CI ci1 = m.cbegin(); + CI ci2 = m.cbegin() + 1; + assert(ci1 == ci1); + assert(!(ci1 != ci1)); + assert(ci1 != ci2); + assert(!(ci1 == ci2)); + assert(ci1 < ci2); + assert(!(ci1 < ci1)); + assert(ci1 <= ci1); + assert(ci1 <= ci2); + assert(!(ci2 <= ci1)); + assert(ci2 > ci1); + assert(!(ci2 > ci2)); + assert(ci2 >= ci1); + assert(ci2 >= ci2); + assert(!(ci1 >= ci2)); + + RI ri1 = m.rbegin(); + RI ri2 = m.rbegin() + 1; + assert(ri1 == ri1); + assert(!(ri1 != ri1)); + assert(ri1 != ri2); + assert(!(ri1 == ri2)); + assert(ri1 < ri2); + assert(!(ri1 < ri1)); + assert(ri1 <= ri1); + assert(ri1 <= ri2); + assert(!(ri2 <= ri1)); + assert(ri2 > ri1); + assert(!(ri2 > ri2)); + assert(ri2 >= ri1); + assert(ri2 >= ri2); + assert(!(ri1 >= ri2)); + + CRI cri1 = m.crbegin(); + CRI cri2 = m.crbegin() + 1; + assert(cri1 == cri1); + assert(!(cri1 != cri1)); + assert(cri1 != cri2); + assert(!(cri1 == cri2)); + assert(cri1 < cri2); + assert(!(cri1 < cri1)); + assert(cri1 <= cri1); + assert(cri1 <= cri2); + assert(!(cri2 <= cri1)); + assert(cri2 > cri1); + assert(!(cri2 > cri2)); + assert(cri2 >= cri1); + assert(cri2 >= cri2); + assert(!(cri1 >= cri2)); + + if constexpr (std::three_way_comparable) { + static_assert(std::three_way_comparable); // ...of course the wrapped iterators still support <=>. + static_assert(std::three_way_comparable); + static_assert(std::three_way_comparable); + static_assert(std::three_way_comparable); + static_assert(std::same_as I()), std::strong_ordering>); + static_assert(std::same_as CI()), std::strong_ordering>); + static_assert(std::same_as CI()), std::strong_ordering>); + static_assert(std::same_as RI()), std::strong_ordering>); + static_assert(std::same_as CRI()), std::strong_ordering>); + static_assert(std::same_as CRI()), std::strong_ordering>); + + assert(i1 <=> i1 == std::strong_ordering::equivalent); + assert(i1 <=> i2 == std::strong_ordering::less); + assert(i2 <=> i1 == std::strong_ordering::greater); + + assert(ci1 <=> ci1 == std::strong_ordering::equivalent); + assert(ci1 <=> ci2 == std::strong_ordering::less); + assert(ci2 <=> ci1 == std::strong_ordering::greater); + + assert(ri1 <=> ri1 == std::strong_ordering::equivalent); + assert(ri1 <=> ri2 == std::strong_ordering::less); + assert(ri2 <=> ri1 == std::strong_ordering::greater); + + assert(cri1 <=> cri1 == std::strong_ordering::equivalent); + assert(cri1 <=> cri2 == std::strong_ordering::less); + assert(cri2 <=> cri1 == std::strong_ordering::greater); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_concept_conformance.compile.pass.cpp new file mode 100644 index 000000000000000..28814e2e37e3c11 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_concept_conformance.compile.pass.cpp @@ -0,0 +1,82 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// iterator, const_iterator, reverse_iterator, const_reverse_iterator + +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using C = std::flat_map, KeyContainer, ValueContainer>; + using I = C::iterator; + using CI = C::const_iterator; + using RI = C::reverse_iterator; + using CRI = C::const_reverse_iterator; + static_assert(std::random_access_iterator); + static_assert(std::random_access_iterator); + static_assert(std::random_access_iterator); + static_assert(std::random_access_iterator); + static_assert(!std::contiguous_iterator); + static_assert(!std::contiguous_iterator); + static_assert(!std::contiguous_iterator); + static_assert(!std::contiguous_iterator); + static_assert(!std::indirectly_writable>); + static_assert(!std::indirectly_writable>); + static_assert(!std::indirectly_writable>); + static_assert(!std::indirectly_writable>); + static_assert(std::sentinel_for); + static_assert(std::sentinel_for); + static_assert(!std::sentinel_for); + static_assert(!std::sentinel_for); + static_assert(std::sentinel_for); + static_assert(std::sentinel_for); + static_assert(!std::sentinel_for); + static_assert(!std::sentinel_for); + static_assert(!std::sentinel_for); + static_assert(!std::sentinel_for); + static_assert(std::sentinel_for); + static_assert(std::sentinel_for); + static_assert(!std::sentinel_for); + static_assert(!std::sentinel_for); + static_assert(std::sentinel_for); + static_assert(std::sentinel_for); + static_assert(std::indirectly_movable_storable*>); + static_assert(std::indirectly_movable_storable*>); + static_assert(std::indirectly_movable_storable*>); + static_assert(std::indirectly_movable_storable*>); + +#ifdef _LIBCPP_VERSION + static_assert(std::is_same_v::iterator_category, std::random_access_iterator_tag>); + static_assert(std::is_same_v::iterator_category, std::random_access_iterator_tag>); + static_assert(std::is_same_v::iterator_category, std::random_access_iterator_tag>); + static_assert(std::is_same_v::iterator_category, std::random_access_iterator_tag>); +#endif +} + +void test() { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/range_concept_conformance.compile.pass.cpp new file mode 100644 index 000000000000000..abbad310f49caf5 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/range_concept_conformance.compile.pass.cpp @@ -0,0 +1,55 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +#include +#include +#include +#include +#include +#include +#include +#include "MinSequenceContainer.h" +#include "min_allocator.h" + +template +void test() { + { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using C = std::flat_map, KeyContainer, ValueContainer>; + + static_assert(std::same_as, typename C::iterator>); + static_assert(std::ranges::random_access_range); + static_assert(!std::ranges::contiguous_range); + static_assert(std::ranges::common_range); + static_assert(std::ranges::input_range); + static_assert(!std::ranges::view); + static_assert(std::ranges::sized_range); + static_assert(!std::ranges::borrowed_range); + static_assert(std::ranges::viewable_range); + + static_assert(std::same_as, typename C::const_iterator>); + static_assert(std::ranges::random_access_range); + static_assert(!std::ranges::contiguous_range); + static_assert(std::ranges::common_range); + static_assert(std::ranges::input_range); + static_assert(!std::ranges::view); + static_assert(std::ranges::sized_range); + static_assert(!std::ranges::borrowed_range); + static_assert(!std::ranges::viewable_range); + } +} + +void test() { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp new file mode 100644 index 000000000000000..09e18986a7e813c --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp @@ -0,0 +1,90 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// reverse_iterator rbegin() noexcept; +// const_reverse_iterator rbegin() const noexcept; +// reverse_iterator rend() noexcept; +// const_reverse_iterator rend() const noexcept; +// +// const_reverse_iterator crbegin() const noexcept; +// const_reverse_iterator crend() const noexcept; + +#include +#include +#include +#include +#include +#include + +#include + +#include "test_macros.h" +#include + +int main(int, char**) { + { + using M = std::flat_map, std::deque, std::deque>; + M m = {{1, 'a'}, {2, 'b'}, {3, 'c'}, {4, 'd'}}; + const M& cm = m; + ASSERT_SAME_TYPE(decltype(m.rbegin()), M::reverse_iterator); + ASSERT_SAME_TYPE(decltype(m.crbegin()), M::const_reverse_iterator); + ASSERT_SAME_TYPE(decltype(cm.rbegin()), M::const_reverse_iterator); + ASSERT_SAME_TYPE(decltype(m.rend()), M::reverse_iterator); + ASSERT_SAME_TYPE(decltype(m.crend()), M::const_reverse_iterator); + ASSERT_SAME_TYPE(decltype(cm.rend()), M::const_reverse_iterator); + static_assert(noexcept(m.rbegin())); + static_assert(noexcept(cm.rbegin())); + static_assert(noexcept(m.crbegin())); + static_assert(noexcept(m.rend())); + static_assert(noexcept(cm.rend())); + static_assert(noexcept(m.crend())); + assert(m.size() == 4); + assert(std::distance(m.rbegin(), m.rend()) == 4); + assert(std::distance(cm.rbegin(), cm.rend()) == 4); + assert(std::distance(m.crbegin(), m.crend()) == 4); + assert(std::distance(cm.crbegin(), cm.crend()) == 4); + M::reverse_iterator i; // default-construct + ASSERT_SAME_TYPE(decltype(i->first), const int&); + ASSERT_SAME_TYPE(decltype(i->second), char&); + i = m.rbegin(); // move-assignment + M::const_reverse_iterator k = i; // converting constructor + assert(i == k); // comparison + for (int j = 4; j >= 1; --j, ++i) { // pre-increment + assert(i->first == j); // operator-> + assert(i->second == 'a' + j - 1); + } + assert(i == m.rend()); + for (int j = 1; j <= 4; ++j) { + --i; // pre-decrement + assert((*i).first == j); + assert((*i).second == 'a' + j - 1); + } + assert(i == m.rbegin()); + } + { + // N3644 testing + using C = std::flat_map; + C::reverse_iterator ii1{}, ii2{}; + C::reverse_iterator ii4 = ii1; + C::const_reverse_iterator cii{}; + assert(ii1 == ii2); + assert(ii1 == ii4); + assert(!(ii1 != ii2)); + + assert((ii1 == cii)); + assert((cii == ii1)); + assert(!(ii1 != cii)); + assert(!(cii != ii1)); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/clear.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/clear.pass.cpp new file mode 100644 index 000000000000000..30271eb55660bf3 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/clear.pass.cpp @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// class flat_map + +// void clear() noexcept; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +// test noexcept + +template +concept NoExceptClear = requires(T t) { + { t.clear() } noexcept; +}; + +static_assert(NoExceptClear>); +#ifndef TEST_HAS_NO_EXCEPTIONS +static_assert( + NoExceptClear, ThrowOnMoveContainer, ThrowOnMoveContainer>>); +#endif + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + M m = {{1, 2}, {2, 1}, {3, 3}, {4, 1}, {5, 0}}; + assert(m.size() == 5); + ASSERT_NOEXCEPT(m.clear()); + ASSERT_SAME_TYPE(decltype(m.clear()), void); + m.clear(); + assert(m.size() == 0); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace.pass.cpp new file mode 100644 index 000000000000000..06631ac689f75db --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace.pass.cpp @@ -0,0 +1,103 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// pair emplace(Args&&... args); + +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "../../../Emplaceable.h" +#include "DefaultOnly.h" +#include "min_allocator.h" + +// Constraints: is_constructible_v, Args...> is true. +template +concept CanEmplace = requires(M m, Args&&... args) { m.emplace(std::forward(args)...); }; + +using Map = std::flat_map; +static_assert(CanEmplace); +static_assert(CanEmplace); +static_assert(CanEmplace, std::tuple>); +static_assert(!CanEmplace); +static_assert(!CanEmplace); + +template +void test_simple() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using R = std::pair; + M m; + ASSERT_SAME_TYPE(decltype(m.emplace()), R); + R r = m.emplace(typename M::value_type(2, 3.5)); + assert(r.second); + assert(r.first == m.begin()); + assert(m.size() == 1); + assert(m.begin()->first == 2); + assert(m.begin()->second == 3.5); +} + +template +void test_emplaceable() { + using M = std::flat_map, KeyContainer, ValueContainer>; + using R = std::pair; + + M m; + ASSERT_SAME_TYPE(decltype(m.emplace()), R); + R r = m.emplace(std::piecewise_construct, std::forward_as_tuple(2), std::forward_as_tuple()); + assert(r.second); + assert(r.first == m.begin()); + assert(m.size() == 1); + assert(m.begin()->first == 2); + assert(m.begin()->second == Emplaceable()); + r = m.emplace(std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5)); + assert(r.second); + assert(r.first == m.begin()); + assert(m.size() == 2); + assert(m.begin()->first == 1); + assert(m.begin()->second == Emplaceable(2, 3.5)); + r = m.emplace(std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5)); + assert(!r.second); + assert(r.first == m.begin()); + assert(m.size() == 2); + assert(m.begin()->first == 1); + assert(m.begin()->second == Emplaceable(2, 3.5)); +} + +int main(int, char**) { + test_simple, std::vector>(); + test_simple, std::vector>(); + test_simple, MinSequenceContainer>(); + test_simple>, std::vector>>(); + + test_emplaceable, std::vector>(); + test_emplaceable, std::vector>(); + test_emplaceable, MinSequenceContainer>(); + test_emplaceable>, std::vector>>(); + + { + auto emplace_func = [](auto& m, auto key_arg, auto value_arg) { + m.emplace(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg)); + }; + test_emplace_exception_guarantee(emplace_func); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace_hint.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace_hint.pass.cpp new file mode 100644 index 000000000000000..cfee6cac5806cc1 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace_hint.pass.cpp @@ -0,0 +1,102 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// iterator emplace_hint(const_iterator position, Args&&... args); + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "../../../Emplaceable.h" +#include "DefaultOnly.h" +#include "min_allocator.h" +#include "../helpers.h" + +#if defined(_LIBCPP_VERSION) +// spec only specifies `emplace(Args&&...)` is_constructible_v, Args...> is true. +// nothing mentioned for emplace_hint +template +concept CanEmplaceHint = + requires(M m, typename M::const_iterator i, Args&&... args) { m.emplace_hint(i, std::forward(args)...); }; + +using Map = std::flat_map; +static_assert(CanEmplaceHint); +static_assert(CanEmplaceHint); +static_assert(CanEmplaceHint, std::tuple>); +static_assert(!CanEmplaceHint); +static_assert(!CanEmplaceHint); +#endif + +template +void test_simple() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using R = M::iterator; + M m; + ASSERT_SAME_TYPE(decltype(m.emplace_hint(m.cbegin())), R); + R r = m.emplace_hint(m.end(), typename M::value_type(2, 3.5)); + assert(r == m.begin()); + assert(m.size() == 1); + assert(m.begin()->first == 2); + assert(m.begin()->second == 3.5); +} + +template +void test_emplaceable() { + using M = std::flat_map, KeyContainer, ValueContainer>; + using R = M::iterator; + + M m; + ASSERT_SAME_TYPE(decltype(m.emplace_hint(m.cbegin())), R); + R r = m.emplace_hint(m.end(), std::piecewise_construct, std::forward_as_tuple(2), std::forward_as_tuple()); + assert(r == m.begin()); + assert(m.size() == 1); + assert(m.begin()->first == 2); + assert(m.begin()->second == Emplaceable()); + r = m.emplace_hint(m.end(), std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5)); + assert(r == m.begin()); + assert(m.size() == 2); + assert(m.begin()->first == 1); + assert(m.begin()->second == Emplaceable(2, 3.5)); + r = m.emplace_hint(m.end(), std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5)); + assert(r == m.begin()); + assert(m.size() == 2); + assert(m.begin()->first == 1); + assert(m.begin()->second == Emplaceable(2, 3.5)); +} + +int main(int, char**) { + test_simple, std::vector>(); + test_simple, std::vector>(); + test_simple, MinSequenceContainer>(); + test_simple>, std::vector>>(); + + test_emplaceable, std::vector>(); + test_emplaceable, std::vector>(); + test_emplaceable, MinSequenceContainer>(); + test_emplaceable>, std::vector>>(); + + { + auto emplace_func = [](auto& m, auto key_arg, auto value_arg) { + m.emplace_hint(m.begin(), std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg)); + }; + test_emplace_exception_guarantee(emplace_func); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter.pass.cpp new file mode 100644 index 000000000000000..914e8b676a65682 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter.pass.cpp @@ -0,0 +1,151 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// iterator erase(iterator position); +// iterator erase(const_iterator position); + +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using P = std::pair; + using I = M::iterator; + + P ar[] = { + P(1, 1.5), + P(2, 2.5), + P(3, 3.5), + P(4, 4.5), + P(5, 5.5), + P(6, 6.5), + P(7, 7.5), + P(8, 8.5), + }; + M m(ar, ar + sizeof(ar) / sizeof(ar[0])); + assert(m.size() == 8); + std::same_as decltype(auto) i1 = m.erase(std::next(m.cbegin(), 3)); + assert(m.size() == 7); + assert(i1 == std::next(m.begin(), 3)); + assert(m.begin()->first == 1); + assert(m.begin()->second == 1.5); + assert(std::next(m.begin())->first == 2); + assert(std::next(m.begin())->second == 2.5); + assert(std::next(m.begin(), 2)->first == 3); + assert(std::next(m.begin(), 2)->second == 3.5); + assert(std::next(m.begin(), 3)->first == 5); + assert(std::next(m.begin(), 3)->second == 5.5); + assert(std::next(m.begin(), 4)->first == 6); + assert(std::next(m.begin(), 4)->second == 6.5); + assert(std::next(m.begin(), 5)->first == 7); + assert(std::next(m.begin(), 5)->second == 7.5); + assert(std::next(m.begin(), 6)->first == 8); + assert(std::next(m.begin(), 6)->second == 8.5); + + std::same_as decltype(auto) i2 = m.erase(std::next(m.begin(), 0)); + assert(m.size() == 6); + assert(i2 == m.begin()); + assert(m.begin()->first == 2); + assert(m.begin()->second == 2.5); + assert(std::next(m.begin())->first == 3); + assert(std::next(m.begin())->second == 3.5); + assert(std::next(m.begin(), 2)->first == 5); + assert(std::next(m.begin(), 2)->second == 5.5); + assert(std::next(m.begin(), 3)->first == 6); + assert(std::next(m.begin(), 3)->second == 6.5); + assert(std::next(m.begin(), 4)->first == 7); + assert(std::next(m.begin(), 4)->second == 7.5); + assert(std::next(m.begin(), 5)->first == 8); + assert(std::next(m.begin(), 5)->second == 8.5); + + std::same_as decltype(auto) i3 = m.erase(std::next(m.cbegin(), 5)); + assert(m.size() == 5); + assert(i3 == m.end()); + assert(m.begin()->first == 2); + assert(m.begin()->second == 2.5); + assert(std::next(m.begin())->first == 3); + assert(std::next(m.begin())->second == 3.5); + assert(std::next(m.begin(), 2)->first == 5); + assert(std::next(m.begin(), 2)->second == 5.5); + assert(std::next(m.begin(), 3)->first == 6); + assert(std::next(m.begin(), 3)->second == 6.5); + assert(std::next(m.begin(), 4)->first == 7); + assert(std::next(m.begin(), 4)->second == 7.5); + + std::same_as decltype(auto) i4 = m.erase(std::next(m.begin(), 1)); + assert(m.size() == 4); + assert(i4 == std::next(m.begin())); + assert(m.begin()->first == 2); + assert(m.begin()->second == 2.5); + assert(std::next(m.begin())->first == 5); + assert(std::next(m.begin())->second == 5.5); + assert(std::next(m.begin(), 2)->first == 6); + assert(std::next(m.begin(), 2)->second == 6.5); + assert(std::next(m.begin(), 3)->first == 7); + assert(std::next(m.begin(), 3)->second == 7.5); + + std::same_as decltype(auto) i5 = m.erase(std::next(m.cbegin(), 2)); + assert(m.size() == 3); + assert(i5 == std::next(m.begin(), 2)); + assert(m.begin()->first == 2); + assert(m.begin()->second == 2.5); + assert(std::next(m.begin())->first == 5); + assert(std::next(m.begin())->second == 5.5); + assert(std::next(m.begin(), 2)->first == 7); + assert(std::next(m.begin(), 2)->second == 7.5); + + std::same_as decltype(auto) i6 = m.erase(std::next(m.begin(), 2)); + assert(m.size() == 2); + assert(i6 == std::next(m.begin(), 2)); + assert(m.begin()->first == 2); + assert(m.begin()->second == 2.5); + assert(std::next(m.begin())->first == 5); + assert(std::next(m.begin())->second == 5.5); + + std::same_as decltype(auto) i7 = m.erase(std::next(m.cbegin(), 0)); + assert(m.size() == 1); + assert(i7 == std::next(m.begin(), 0)); + assert(m.begin()->first == 5); + assert(m.begin()->second == 5.5); + + std::same_as decltype(auto) i8 = m.erase(m.begin()); + assert(m.size() == 0); + assert(i8 == m.begin()); + assert(i8 == m.end()); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto erase_function = [](auto& m, auto) { m.erase(m.begin() + 2); }; + test_erase_exception_guarantee(erase_function); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter_iter.pass.cpp new file mode 100644 index 000000000000000..0bc92082940291b --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter_iter.pass.cpp @@ -0,0 +1,109 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// iterator erase(const_iterator first, const_iterator last); + +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using P = std::pair; + using I = M::iterator; + + P ar[] = { + P(1, 1.5), + P(2, 2.5), + P(3, 3.5), + P(4, 4.5), + P(5, 5.5), + P(6, 6.5), + P(7, 7.5), + P(8, 8.5), + }; + M m(ar, ar + sizeof(ar) / sizeof(ar[0])); + assert(m.size() == 8); + std::same_as decltype(auto) i1 = m.erase(m.cbegin(), m.cbegin()); + assert(m.size() == 8); + assert(i1 == m.begin()); + assert(m.begin()->first == 1); + assert(m.begin()->second == 1.5); + assert(std::next(m.begin())->first == 2); + assert(std::next(m.begin())->second == 2.5); + assert(std::next(m.begin(), 2)->first == 3); + assert(std::next(m.begin(), 2)->second == 3.5); + assert(std::next(m.begin(), 3)->first == 4); + assert(std::next(m.begin(), 3)->second == 4.5); + assert(std::next(m.begin(), 4)->first == 5); + assert(std::next(m.begin(), 4)->second == 5.5); + assert(std::next(m.begin(), 5)->first == 6); + assert(std::next(m.begin(), 5)->second == 6.5); + assert(std::next(m.begin(), 6)->first == 7); + assert(std::next(m.begin(), 6)->second == 7.5); + assert(std::next(m.begin(), 7)->first == 8); + assert(std::next(m.begin(), 7)->second == 8.5); + + std::same_as decltype(auto) i2 = m.erase(m.cbegin(), std::next(m.cbegin(), 2)); + assert(m.size() == 6); + assert(i2 == m.begin()); + assert(std::next(m.begin(), 0)->first == 3); + assert(std::next(m.begin(), 0)->second == 3.5); + assert(std::next(m.begin(), 1)->first == 4); + assert(std::next(m.begin(), 1)->second == 4.5); + assert(std::next(m.begin(), 2)->first == 5); + assert(std::next(m.begin(), 2)->second == 5.5); + assert(std::next(m.begin(), 3)->first == 6); + assert(std::next(m.begin(), 3)->second == 6.5); + assert(std::next(m.begin(), 4)->first == 7); + assert(std::next(m.begin(), 4)->second == 7.5); + assert(std::next(m.begin(), 5)->first == 8); + assert(std::next(m.begin(), 5)->second == 8.5); + + std::same_as decltype(auto) i3 = m.erase(std::next(m.cbegin(), 2), std::next(m.cbegin(), 6)); + assert(m.size() == 2); + assert(i3 == std::next(m.begin(), 2)); + assert(std::next(m.begin(), 0)->first == 3); + assert(std::next(m.begin(), 0)->second == 3.5); + assert(std::next(m.begin(), 1)->first == 4); + assert(std::next(m.begin(), 1)->second == 4.5); + + std::same_as decltype(auto) i4 = m.erase(m.cbegin(), m.cend()); + assert(m.size() == 0); + assert(i4 == m.begin()); + assert(i4 == m.end()); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto erase_function = [](auto& m, auto) { m.erase(m.begin(), m.begin() + 2); }; + test_erase_exception_guarantee(erase_function); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key.pass.cpp new file mode 100644 index 000000000000000..ef57b1cb5512d57 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key.pass.cpp @@ -0,0 +1,91 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// size_type erase(const key_type& k); + +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +template > +void test() { + using M = std::flat_map; + + auto make = [](std::initializer_list il) { + M m; + for (int i : il) { + m.emplace(i, i); + } + return m; + }; + M m = make({1, 2, 3, 4, 5, 6, 7, 8}); + ASSERT_SAME_TYPE(decltype(m.erase(9)), typename M::size_type); + auto n = m.erase(9); + assert(n == 0); + assert(m == make({1, 2, 3, 4, 5, 6, 7, 8})); + n = m.erase(4); + assert(n == 1); + assert(m == make({1, 2, 3, 5, 6, 7, 8})); + n = m.erase(1); + assert(n == 1); + assert(m == make({2, 3, 5, 6, 7, 8})); + n = m.erase(8); + assert(n == 1); + assert(m == make({2, 3, 5, 6, 7})); + n = m.erase(3); + assert(n == 1); + assert(m == make({2, 5, 6, 7})); + n = m.erase(4); + assert(n == 0); + assert(m == make({2, 5, 6, 7})); + n = m.erase(6); + assert(n == 1); + assert(m == make({2, 5, 7})); + n = m.erase(7); + assert(n == 1); + assert(m == make({2, 5})); + n = m.erase(2); + assert(n == 1); + assert(m == make({5})); + n = m.erase(5); + assert(n == 1); + assert(m.empty()); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector, std::greater<>>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto erase_function = [](auto& m, auto key_arg) { + using Map = std::decay_t; + using Key = typename Map::key_type; + const Key key{key_arg}; + m.erase(key); + }; + test_erase_exception_guarantee(erase_function); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key_transparent.pass.cpp new file mode 100644 index 000000000000000..3ba30757bf2c7d8 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key_transparent.pass.cpp @@ -0,0 +1,144 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// size_type erase(K&& k); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type. +template +concept CanErase = requires(M m, Transparent k) { m.erase(k); }; +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; +static_assert(CanErase); +static_assert(!CanErase); +static_assert(!CanErase); +static_assert(!CanErase); + +template +struct HeterogeneousKey { + explicit HeterogeneousKey(Key key, It it) : key_(key), it_(it) {} + operator It() && { return it_; } + auto operator<=>(Key key) const { return key_ <=> key; } + friend bool operator<(const HeterogeneousKey&, const HeterogeneousKey&) { + assert(false); + return false; + } + Key key_; + It it_; +}; + +template +void test_simple() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + M m = {{1, 1}, {2, 2}, {3, 3}, {4, 4}}; + ASSERT_SAME_TYPE(decltype(m.erase(9)), typename M::size_type); + auto n = m.erase(3); // erase(K&&) [with K=int] + assert(n == 1); + assert((m == M{{1, 1}, {2, 2}, {4, 4}})); + typename M::key_type lvalue = 2; + n = m.erase(lvalue); // erase(K&&) [with K=int&] + assert(n == 1); + assert((m == M{{1, 1}, {4, 4}})); + const typename M::key_type const_lvalue = 1; + n = m.erase(const_lvalue); // erase(const key_type&) + assert(n == 1); + assert((m == M{{4, 4}})); +} + +template +void test_transparent_comparator() { + using M = std::flat_map; + M m = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}}; + ASSERT_SAME_TYPE(decltype(m.erase(Transparent{"abc"})), typename M::size_type); + + auto n = m.erase(Transparent{"epsilon"}); + assert(n == 1); + + M expected = {{"alpha", 1}, {"beta", 2}, {"eta", 4}, {"gamma", 5}}; + assert(m == expected); + + auto n2 = m.erase(Transparent{"aaa"}); + assert(n2 == 0); + assert(m == expected); +} + +int main(int, char**) { + test_simple, std::vector>(); + test_simple, std::vector>(); + test_simple, MinSequenceContainer>(); + test_simple>, std::vector>>(); + + test_transparent_comparator, std::vector>(); + test_transparent_comparator, std::vector>(); + test_transparent_comparator, MinSequenceContainer>(); + test_transparent_comparator>, + std::vector>>(); + + { + // P2077's HeterogeneousKey example + using M = std::flat_map>; + M m = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}}; + auto h1 = HeterogeneousKey(8, m.begin()); + std::same_as auto n = m.erase(h1); // lvalue is not convertible to It; erase(K&&) is the best match + assert(n == 1); + assert((m == M{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}})); + std::same_as auto it = m.erase(std::move(h1)); // rvalue is convertible to It; erase(K&&) drops out + assert(it == m.begin()); + assert((m == M{{2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}})); + } + { + using M = std::flat_map>; + M m = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}}; + auto h1 = HeterogeneousKey(8, m.begin()); + std::same_as auto n = m.erase(h1); // lvalue is not convertible to It; erase(K&&) is the best match + assert(n == 1); + assert((m == M{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}})); + std::same_as auto it = m.erase(std::move(h1)); // rvalue is convertible to It; erase(K&&) drops out + assert(it == m.begin()); + assert((m == M{{2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}})); + } + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto n = m.erase(Transparent{3}); + assert(n == 1); + assert(transparent_used); + } + { + auto erase_transparent = [](auto& m, auto key_arg) { + using Map = std::decay_t; + using Key = typename Map::key_type; + m.erase(Transparent{key_arg}); + }; + test_erase_exception_guarantee(erase_transparent); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/extract.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/extract.pass.cpp new file mode 100644 index 000000000000000..d8e4ce94efb9e98 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/extract.pass.cpp @@ -0,0 +1,91 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// containers extract() &&; + +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +concept CanExtract = requires(T&& t) { std::forward(t).extract(); }; + +static_assert(CanExtract&&>); +static_assert(!CanExtract&>); +static_assert(!CanExtract const&>); +static_assert(!CanExtract const&&>); + +template +void test() { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m = M({1, 2, 3}, {4, 5, 6}); + + std::same_as auto containers = std::move(m).extract(); + + auto expected_keys = {1, 2, 3}; + auto expected_values = {4, 5, 6}; + assert(std::ranges::equal(containers.keys, expected_keys)); + assert(std::ranges::equal(containers.values, expected_values)); + check_invariant(m); + LIBCPP_ASSERT(m.empty()); + LIBCPP_ASSERT(m.keys().size() == 0); + LIBCPP_ASSERT(m.values().size() == 0); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + { + // extracted object maintains invariant if one of underlying container does not clear after move + using M = std::flat_map, std::vector, CopyOnlyVector>; + M m = M({1, 2, 3}, {1, 2, 3}); + std::same_as auto containers = std::move(m).extract(); + assert(containers.keys.size() == 3); + assert(containers.values.size() == 3); + check_invariant(m); + LIBCPP_ASSERT(m.empty()); + LIBCPP_ASSERT(m.keys().size() == 0); + LIBCPP_ASSERT(m.values().size() == 0); + } + + { +#ifndef TEST_HAS_NO_EXCEPTIONS + using KeyContainer = std::vector; + using ValueContainer = ThrowOnMoveContainer; + using M = std::flat_map; + + M m; + m.emplace(1, 1); + m.emplace(2, 2); + try { + auto c = std::move(m).extract(); + assert(false); + } catch (int) { + check_invariant(m); + // In libc++, we try to erase the key after value emplacement failure. + // and after erasure failure, we clear the flat_map + LIBCPP_ASSERT(m.size() == 0); + } +#endif + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_cv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_cv.pass.cpp new file mode 100644 index 000000000000000..7e667c4e4877bff --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_cv.pass.cpp @@ -0,0 +1,83 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// pair insert(const value_type& v); + +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "../helpers.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using R = std::pair; + using VT = typename M::value_type; + M m; + + const VT v1(2, 2.5); + std::same_as decltype(auto) r = m.insert(v1); + assert(r.second); + assert(r.first == m.begin()); + assert(m.size() == 1); + assert(r.first->first == 2); + assert(r.first->second == 2.5); + + const VT v2(1, 1.5); + r = m.insert(v2); + assert(r.second); + assert(r.first == m.begin()); + assert(m.size() == 2); + assert(r.first->first == 1); + assert(r.first->second == 1.5); + + const VT v3(3, 3.5); + r = m.insert(v3); + assert(r.second); + assert(r.first == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r.first->first == 3); + assert(r.first->second == 3.5); + + const VT v4(3, 4.5); + r = m.insert(v4); + assert(!r.second); + assert(r.first == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r.first->first == 3); + assert(r.first->second == 3.5); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto insert_func = [](auto& m, auto key_arg, auto value_arg) { + using FlatMap = std::decay_t; + using value_type = typename FlatMap::value_type; + const value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg)); + m.insert(p); + }; + test_emplace_exception_guarantee(insert_func); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_initializer_list.pass.cpp new file mode 100644 index 000000000000000..32be3ab8a95b3d2 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_initializer_list.pass.cpp @@ -0,0 +1,68 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// void insert(initializer_list il); + +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using V = std::pair; + + M m = {{1, 1}, {1, 1.5}, {1, 2}, {3, 1}, {3, 1.5}, {3, 2}}; + m.insert({ + {4, 1}, + {4, 1.5}, + {4, 2}, + {1, 1}, + {1, 1.5}, + {1, 2}, + {2, 1}, + {2, 1.5}, + {2, 2}, + }); + assert(m.size() == 4); + assert(std::distance(m.begin(), m.end()) == 4); + assert(*m.begin() == V(1, 1)); + assert(*std::next(m.begin()) == V(2, 1)); + assert(*std::next(m.begin(), 2) == V(3, 1)); + assert(*std::next(m.begin(), 3) == V(4, 1)); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto insert_func = [](auto& m, const auto& newValues) { + using FlatMap = std::decay_t; + using value_type = typename FlatMap::value_type; + std::initializer_list il = {{newValues[0].first, newValues[0].second}}; + m.insert(il); + }; + test_insert_range_exception_guarantee(insert_func); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_cv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_cv.pass.cpp new file mode 100644 index 000000000000000..4bbe0628317dcba --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_cv.pass.cpp @@ -0,0 +1,79 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// iterator insert(const_iterator position, const value_type& v); + +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "../helpers.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using R = typename M::iterator; + using VT = typename M::value_type; + + M m; + const VT v1(2, 2.5); + std::same_as decltype(auto) r = m.insert(m.end(), v1); + assert(r == m.begin()); + assert(m.size() == 1); + assert(r->first == 2); + assert(r->second == 2.5); + + const VT v2(1, 1.5); + r = m.insert(m.end(), v2); + assert(r == m.begin()); + assert(m.size() == 2); + assert(r->first == 1); + assert(r->second == 1.5); + + const VT v3(3, 3.5); + r = m.insert(m.end(), v3); + assert(r == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r->first == 3); + assert(r->second == 3.5); + + const VT v4(3, 4.5); + r = m.insert(m.end(), v4); + assert(r == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r->first == 3); + assert(r->second == 3.5); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto insert_func = [](auto& m, auto key_arg, auto value_arg) { + using FlatMap = std::decay_t; + using value_type = typename FlatMap::value_type; + const value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg)); + m.insert(m.begin(), p); + }; + test_emplace_exception_guarantee(insert_func); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_iter.pass.cpp new file mode 100644 index 000000000000000..8455b19475fe43e --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_iter.pass.cpp @@ -0,0 +1,89 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// void insert(InputIterator first, InputIterator last); + +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "test_iterators.h" +#include "min_allocator.h" + +// test constraint InputIterator +template +concept CanInsert = requires(M m, Args&&... args) { m.insert(std::forward(args)...); }; + +using Map = std::flat_map; +using Pair = std::pair; + +static_assert(CanInsert); +static_assert(CanInsert, cpp17_input_iterator>); +static_assert(!CanInsert); +static_assert(!CanInsert, cpp20_input_iterator>); + +template +void test() { + using P = std::pair; + using M = std::flat_map, KeyContainer, ValueContainer>; + + P ar1[] = { + P(2, 1), + P(2, 1.5), + P(2, 2), + P(1, 1), + P(1, 1.5), + P(1, 2), + P(3, 1), + P(3, 1.5), + P(3, 2), + }; + P ar2[] = { + P(4, 1), + P(4, 1.5), + P(4, 2), + P(1, 1), + P(1, 1.5), + P(1, 2), + P(0, 1), + P(0, 1.5), + P(0, 2), + }; + + M m; + m.insert(cpp17_input_iterator(ar1), cpp17_input_iterator(ar1 + sizeof(ar1) / sizeof(ar1[0]))); + assert(m.size() == 3); + M expected{{1, 1}, {2, 1}, {3, 1}}; + assert(m == expected); + + m.insert(cpp17_input_iterator(ar2), cpp17_input_iterator(ar2 + sizeof(ar2) / sizeof(ar2[0]))); + assert(m.size() == 5); + M expected2{{0, 1}, {1, 1}, {2, 1}, {3, 1}, {4, 1}}; + assert(m == expected2); +} +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto insert_func = [](auto& m, const auto& newValues) { m.insert(newValues.begin(), newValues.end()); }; + test_insert_range_exception_guarantee(insert_func); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_rv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_rv.pass.cpp new file mode 100644 index 000000000000000..034941b55eb80b1 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_rv.pass.cpp @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// +// iterator insert(const_iterator position, value_type&&); + +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "MoveOnly.h" +#include "min_allocator.h" +#include "../helpers.h" +#include "test_macros.h" + +template +void do_insert_iter_rv_test() { + using M = Container; + using P = Pair; + using R = typename M::iterator; + M m; + std::same_as decltype(auto) r = m.insert(m.end(), P(2, 2)); + assert(r == m.begin()); + assert(m.size() == 1); + assert(r->first == 2); + assert(r->second == 2); + + r = m.insert(m.end(), P(1, 1)); + assert(r == m.begin()); + assert(m.size() == 2); + assert(r->first == 1); + assert(r->second == 1); + + r = m.insert(m.end(), P(3, 3)); + assert(r == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r->first == 3); + assert(r->second == 3); + + r = m.insert(m.end(), P(3, 4)); + assert(r == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r->first == 3); + assert(r->second == 3); +} + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using P = std::pair; + using CP = std::pair; + + do_insert_iter_rv_test(); + do_insert_iter_rv_test(); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, std::deque>(); + test, std::deque>(); + test, MinSequenceContainer>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + test>, std::vector>>(); + + { + auto insert_func = [](auto& m, auto key_arg, auto value_arg) { + using FlatMap = std::decay_t; + using value_type = typename FlatMap::value_type; + value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg)); + m.insert(m.begin(), std::move(p)); + }; + test_emplace_exception_guarantee(insert_func); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign.pass.cpp new file mode 100644 index 000000000000000..398a7a1a4052e08 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign.pass.cpp @@ -0,0 +1,326 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "MoveOnly.h" +#include "min_allocator.h" +#include "test_macros.h" +#include "../helpers.h" + +// template +// pair insert_or_assign(const key_type& k, M&& obj); +// template +// pair insert_or_assign(key_type&& k, M&& obj); +// template +// iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj); +// template +// iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj); + +// Constraints: is_assignable_v is true and is_constructible_v is true. +template +concept CanInsertOrAssign = + requires(Map map, K&& k, M&& m) { map.insert_or_assign(std::forward(k), std::forward(m)); }; + +template +concept CanInsertOrAssignIter = requires(Map map, typename Map::const_iterator iter, K&& k, M&& m) { + map.insert_or_assign(iter, std::forward(k), std::forward(m)); +}; + +template +struct ConstructAndAssignFrom { + explicit ConstructAndAssignFrom(From); + ConstructAndAssignFrom& operator=(From); +}; + +template +struct ConstructFrom { + explicit ConstructFrom(From); +}; + +template +struct AssignFrom { + AssignFrom& operator=(From); +}; + +struct V {}; + +static_assert(CanInsertOrAssign>, const int&, V>); +static_assert(!CanInsertOrAssign>, const int&, int>); +static_assert(!CanInsertOrAssign>, const int&, V>); +static_assert(!CanInsertOrAssign>, const int&, V>); + +static_assert(CanInsertOrAssign>, int&&, V>); +static_assert(!CanInsertOrAssign>, int&&, int>); +static_assert(!CanInsertOrAssign>, int&&, V>); +static_assert(!CanInsertOrAssign>, int&&, V>); + +static_assert(CanInsertOrAssignIter>, const int&, V>); +static_assert(!CanInsertOrAssignIter>, const int&, int>); +static_assert(!CanInsertOrAssignIter>, const int&, V>); +static_assert(!CanInsertOrAssignIter>, const int&, V>); + +static_assert(CanInsertOrAssignIter>, int&&, V>); +static_assert(!CanInsertOrAssignIter>, int&&, int>); +static_assert(!CanInsertOrAssignIter>, int&&, V>); +static_assert(!CanInsertOrAssignIter>, int&&, V>); + +template +void test_cv_key() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + { // pair insert_or_assign(const key_type& k, M&& obj); + using R = std::pair; + M m; + for (int i = 0; i < 20; i += 2) + m.emplace(i, Moveable(i, (double)i)); + assert(m.size() == 10); + + for (int i = 0; i < 20; i += 2) { + Moveable mv(i + 1, i + 1); + std::same_as decltype(auto) r1 = m.insert_or_assign(i, std::move(mv)); + assert(m.size() == 10); + assert(!r1.second); // was not inserted + assert(mv.moved()); // was moved from + assert(r1.first->first == i); // key + assert(r1.first->second.get() == i + 1); // value + } + + Moveable mv1(5, 5.0); + std::same_as decltype(auto) r2 = m.insert_or_assign(-1, std::move(mv1)); + assert(m.size() == 11); + assert(r2.second); // was inserted + assert(mv1.moved()); // was moved from + assert(r2.first->first == -1); // key + assert(r2.first->second.get() == 5); // value + + Moveable mv2(9, 9.0); + std::same_as decltype(auto) r3 = m.insert_or_assign(3, std::move(mv2)); + assert(m.size() == 12); + assert(r3.second); // was inserted + assert(mv2.moved()); // was moved from + assert(r3.first->first == 3); // key + assert(r3.first->second.get() == 9); // value + + Moveable mv3(-1, 5.0); + std::same_as decltype(auto) r4 = m.insert_or_assign(117, std::move(mv3)); + assert(m.size() == 13); + assert(r4.second); // was inserted + assert(mv3.moved()); // was moved from + assert(r4.first->first == 117); // key + assert(r4.first->second.get() == -1); // value + } + + { // iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj); + M m; + using R = M::iterator; + for (int i = 0; i < 20; i += 2) + m.emplace(i, Moveable(i, (double)i)); + assert(m.size() == 10); + typename M::const_iterator it = m.find(2); + + Moveable mv1(3, 3.0); + std::same_as decltype(auto) r1 = m.insert_or_assign(it, 2, std::move(mv1)); + assert(m.size() == 10); + assert(mv1.moved()); // was moved from + assert(r1->first == 2); // key + assert(r1->second.get() == 3); // value + + Moveable mv2(5, 5.0); + std::same_as decltype(auto) r2 = m.insert_or_assign(it, 3, std::move(mv2)); + assert(m.size() == 11); + assert(mv2.moved()); // was moved from + assert(r2->first == 3); // key + assert(r2->second.get() == 5); // value + + // wrong hint: begin() + Moveable mv3(7, 7.0); + std::same_as decltype(auto) r3 = m.insert_or_assign(m.begin(), 4, std::move(mv3)); + assert(m.size() == 11); + assert(mv3.moved()); // was moved from + assert(r3->first == 4); // key + assert(r3->second.get() == 7); // value + + Moveable mv4(9, 9.0); + std::same_as decltype(auto) r4 = m.insert_or_assign(m.begin(), 5, std::move(mv4)); + assert(m.size() == 12); + assert(mv4.moved()); // was moved from + assert(r4->first == 5); // key + assert(r4->second.get() == 9); // value + + // wrong hint: end() + Moveable mv5(11, 11.0); + std::same_as decltype(auto) r5 = m.insert_or_assign(m.end(), 6, std::move(mv5)); + assert(m.size() == 12); + assert(mv5.moved()); // was moved from + assert(r5->first == 6); // key + assert(r5->second.get() == 11); // value + + Moveable mv6(13, 13.0); + std::same_as decltype(auto) r6 = m.insert_or_assign(m.end(), 7, std::move(mv6)); + assert(m.size() == 13); + assert(mv6.moved()); // was moved from + assert(r6->first == 7); // key + assert(r6->second.get() == 13); // value + + // wrong hint: third element + Moveable mv7(15, 15.0); + std::same_as decltype(auto) r7 = m.insert_or_assign(std::next(m.begin(), 2), 8, std::move(mv7)); + assert(m.size() == 13); + assert(mv7.moved()); // was moved from + assert(r7->first == 8); // key + assert(r7->second.get() == 15); // value + + Moveable mv8(17, 17.0); + std::same_as decltype(auto) r8 = m.insert_or_assign(std::next(m.begin(), 2), 9, std::move(mv8)); + assert(m.size() == 14); + assert(mv8.moved()); // was moved from + assert(r8->first == 9); // key + assert(r8->second.get() == 17); // value + } +} + +template +void test_rv_key() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + + { // pair insert_or_assign(key_type&& k, M&& obj); + using R = std::pair; + M m; + for (int i = 0; i < 20; i += 2) + m.emplace(Moveable(i, (double)i), Moveable(i + 1, (double)i + 1)); + assert(m.size() == 10); + + Moveable mvkey1(2, 2.0); + Moveable mv1(4, 4.0); + std::same_as decltype(auto) r1 = m.insert_or_assign(std::move(mvkey1), std::move(mv1)); + assert(m.size() == 10); + assert(!r1.second); // was not inserted + assert(!mvkey1.moved()); // was not moved from + assert(mv1.moved()); // was moved from + assert(r1.first->first == mvkey1); // key + assert(r1.first->second.get() == 4); // value + + Moveable mvkey2(3, 3.0); + Moveable mv2(5, 5.0); + std::same_as decltype(auto) r2 = m.try_emplace(std::move(mvkey2), std::move(mv2)); + assert(m.size() == 11); + assert(r2.second); // was inserted + assert(mv2.moved()); // was moved from + assert(mvkey2.moved()); // was moved from + assert(r2.first->first.get() == 3); // key + assert(r2.first->second.get() == 5); // value + } + { // iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj); + using R = M::iterator; + M m; + for (int i = 0; i < 20; i += 2) + m.emplace(Moveable(i, (double)i), Moveable(i + 1, (double)i + 1)); + assert(m.size() == 10); + typename M::const_iterator it = std::next(m.cbegin()); + + Moveable mvkey1(2, 2.0); + Moveable mv1(4, 4.0); + std::same_as decltype(auto) r1 = m.insert_or_assign(it, std::move(mvkey1), std::move(mv1)); + assert(m.size() == 10); + assert(mv1.moved()); // was moved from + assert(!mvkey1.moved()); // was not moved from + assert(r1->first == mvkey1); // key + assert(r1->second.get() == 4); // value + + Moveable mvkey2(3, 3.0); + Moveable mv2(5, 5.0); + std::same_as decltype(auto) r2 = m.insert_or_assign(it, std::move(mvkey2), std::move(mv2)); + assert(m.size() == 11); + assert(mv2.moved()); // was moved from + assert(mvkey2.moved()); // was moved from + assert(r2->first.get() == 3); // key + assert(r2->second.get() == 5); // value + + // wrong hint: begin() + Moveable mvkey3(6, 6.0); + Moveable mv3(8, 8.0); + std::same_as decltype(auto) r3 = m.insert_or_assign(m.begin(), std::move(mvkey3), std::move(mv3)); + assert(m.size() == 11); + assert(mv3.moved()); // was moved from + assert(!mvkey3.moved()); // was not moved from + assert(r3->first == mvkey3); // key + assert(r3->second.get() == 8); // value + + Moveable mvkey4(7, 7.0); + Moveable mv4(9, 9.0); + std::same_as decltype(auto) r4 = m.insert_or_assign(m.begin(), std::move(mvkey4), std::move(mv4)); + assert(m.size() == 12); + assert(mv4.moved()); // was moved from + assert(mvkey4.moved()); // was moved from + assert(r4->first.get() == 7); // key + assert(r4->second.get() == 9); // value + + // wrong hint: end() + Moveable mvkey5(8, 8.0); + Moveable mv5(10, 10.0); + std::same_as decltype(auto) r5 = m.insert_or_assign(m.end(), std::move(mvkey5), std::move(mv5)); + assert(m.size() == 12); + assert(mv5.moved()); // was moved from + assert(!mvkey5.moved()); // was not moved from + assert(r5->first == mvkey5); // key + assert(r5->second.get() == 10); // value + + Moveable mvkey6(9, 9.0); + Moveable mv6(11, 11.0); + std::same_as decltype(auto) r6 = m.insert_or_assign(m.end(), std::move(mvkey6), std::move(mv6)); + assert(m.size() == 13); + assert(mv6.moved()); // was moved from + assert(mvkey6.moved()); // was moved from + assert(r6->first.get() == 9); // key + assert(r6->second.get() == 11); // value + + // wrong hint: third element + Moveable mvkey7(10, 10.0); + Moveable mv7(12, 12.0); + std::same_as decltype(auto) r7 = m.insert_or_assign(std::next(m.begin(), 2), std::move(mvkey7), std::move(mv7)); + assert(m.size() == 13); + assert(mv7.moved()); // was moved from + assert(!mvkey7.moved()); // was not moved from + assert(r7->first == mvkey7); // key + assert(r7->second.get() == 12); // value + + Moveable mvkey8(11, 11.0); + Moveable mv8(13, 13.0); + std::same_as decltype(auto) r8 = m.insert_or_assign(std::next(m.begin(), 2), std::move(mvkey8), std::move(mv8)); + assert(m.size() == 14); + assert(mv8.moved()); // was moved from + assert(mvkey8.moved()); // was moved from + assert(r8->first.get() == 11); // key + assert(r8->second.get() == 13); // value + } +} + +int main(int, char**) { + test_cv_key, std::vector>(); + test_cv_key, std::vector>(); + test_cv_key, MinSequenceContainer>(); + test_cv_key>, std::vector>>(); + + test_rv_key, std::vector>(); + test_rv_key, std::vector>(); + test_rv_key, MinSequenceContainer>(); + test_rv_key>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign_transparent.pass.cpp new file mode 100644 index 000000000000000..636c4edfe551de8 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign_transparent.pass.cpp @@ -0,0 +1,259 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "MoveOnly.h" +#include "min_allocator.h" +#include "test_macros.h" +#include "../helpers.h" + +// template +// pair insert_or_assign(K&& k, M&& obj); +// template +// iterator insert_or_assign(const_iterator hint, K&& k, M&& obj); + +// Constraints: +// The qualified-id Compare::is_transparent is valid and denotes a type. +// is_constructible_v is true. +// is_assignable_v is true. +// is_constructible_v is true. + +template +concept CanInsertOrAssign = + requires(Map map, K&& k, M&& m) { map.insert_or_assign(std::forward(k), std::forward(m)); }; + +template +concept CanInsertOrAssignIter = requires(Map map, typename Map::const_iterator iter, K&& k, M&& m) { + map.insert_or_assign(iter, std::forward(k), std::forward(m)); +}; + +template +struct ConstructAndAssignFrom { + explicit ConstructAndAssignFrom(From); + ConstructAndAssignFrom& operator=(From); +}; + +template +struct ConstructFrom { + explicit ConstructFrom(From); +}; + +template +struct AssignFrom { + AssignFrom& operator=(From); +}; + +struct V {}; + +static_assert(CanInsertOrAssign, TransparentComparator>, + ConvertibleTransparent, + V>); +static_assert(!CanInsertOrAssign, TransparentComparator>, + NonConvertibleTransparent, + V>); +static_assert(!CanInsertOrAssign, NonTransparentComparator>, + NonConvertibleTransparent, + V>); +static_assert(!CanInsertOrAssign, TransparentComparator>, + ConvertibleTransparent, + int>); +static_assert( + !CanInsertOrAssign, TransparentComparator>, ConvertibleTransparent, V>); +static_assert( + !CanInsertOrAssign, TransparentComparator>, ConvertibleTransparent, V>); + +static_assert(CanInsertOrAssignIter, TransparentComparator>, + ConvertibleTransparent, + V>); +static_assert(!CanInsertOrAssignIter, TransparentComparator>, + NonConvertibleTransparent, + V>); +static_assert(!CanInsertOrAssignIter, NonTransparentComparator>, + NonConvertibleTransparent, + V>); +static_assert(!CanInsertOrAssignIter, TransparentComparator>, + ConvertibleTransparent, + int>); +static_assert(!CanInsertOrAssignIter, TransparentComparator>, + ConvertibleTransparent, + V>); +static_assert( + !CanInsertOrAssignIter, TransparentComparator>, ConvertibleTransparent, V>); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + { + // pair insert_or_assign(const key_type& k, M&& obj); + using R = std::pair; + M m; + for (int i = 0; i < 20; i += 2) + m.emplace(i, Moveable(i, (double)i)); + assert(m.size() == 10); + + for (int i = 0; i < 20; i += 2) { + Moveable mv(i + 1, i + 1); + std::same_as decltype(auto) r1 = m.insert_or_assign(ConvertibleTransparent{i}, std::move(mv)); + assert(m.size() == 10); + assert(!r1.second); // was not inserted + assert(mv.moved()); // was moved from + assert(r1.first->first == i); // key + assert(r1.first->second.get() == i + 1); // value + } + + Moveable mv1(5, 5.0); + std::same_as decltype(auto) r2 = m.insert_or_assign(ConvertibleTransparent{-1}, std::move(mv1)); + assert(m.size() == 11); + assert(r2.second); // was inserted + assert(mv1.moved()); // was moved from + assert(r2.first->first == -1); // key + assert(r2.first->second.get() == 5); // value + + Moveable mv2(9, 9.0); + std::same_as decltype(auto) r3 = m.insert_or_assign(ConvertibleTransparent{3}, std::move(mv2)); + assert(m.size() == 12); + assert(r3.second); // was inserted + assert(mv2.moved()); // was moved from + assert(r3.first->first == 3); // key + assert(r3.first->second.get() == 9); // value + + Moveable mv3(-1, 5.0); + std::same_as decltype(auto) r4 = m.insert_or_assign(ConvertibleTransparent{117}, std::move(mv3)); + assert(m.size() == 13); + assert(r4.second); // was inserted + assert(mv3.moved()); // was moved from + assert(r4.first->first == 117); // key + assert(r4.first->second.get() == -1); // value + } + { + // iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj); + using R = M::iterator; + M m; + for (int i = 0; i < 20; i += 2) + m.emplace(i, Moveable(i, (double)i)); + assert(m.size() == 10); + typename M::const_iterator it = m.find(2); + + Moveable mv1(3, 3.0); + std::same_as decltype(auto) r1 = m.insert_or_assign(it, ConvertibleTransparent{2}, std::move(mv1)); + assert(m.size() == 10); + assert(mv1.moved()); // was moved from + assert(r1->first == 2); // key + assert(r1->second.get() == 3); // value + + Moveable mv2(5, 5.0); + std::same_as decltype(auto) r2 = m.insert_or_assign(it, ConvertibleTransparent{3}, std::move(mv2)); + assert(m.size() == 11); + assert(mv2.moved()); // was moved from + assert(r2->first == 3); // key + assert(r2->second.get() == 5); // value + + // wrong hint: begin() + Moveable mv3(7, 7.0); + std::same_as decltype(auto) r3 = m.insert_or_assign(m.begin(), ConvertibleTransparent{4}, std::move(mv3)); + assert(m.size() == 11); + assert(mv3.moved()); // was moved from + assert(r3->first == 4); // key + assert(r3->second.get() == 7); // value + + Moveable mv4(9, 9.0); + std::same_as decltype(auto) r4 = m.insert_or_assign(m.begin(), ConvertibleTransparent{5}, std::move(mv4)); + assert(m.size() == 12); + assert(mv4.moved()); // was moved from + assert(r4->first == 5); // key + assert(r4->second.get() == 9); // value + + // wrong hint: end() + Moveable mv5(11, 11.0); + std::same_as decltype(auto) r5 = m.insert_or_assign(m.end(), ConvertibleTransparent{6}, std::move(mv5)); + assert(m.size() == 12); + assert(mv5.moved()); // was moved from + assert(r5->first == 6); // key + assert(r5->second.get() == 11); // value + + Moveable mv6(13, 13.0); + std::same_as decltype(auto) r6 = m.insert_or_assign(m.end(), ConvertibleTransparent{7}, std::move(mv6)); + assert(m.size() == 13); + assert(mv6.moved()); // was moved from + assert(r6->first == 7); // key + assert(r6->second.get() == 13); // value + + // wrong hint: third element + Moveable mv7(15, 15.0); + std::same_as decltype(auto) r7 = + m.insert_or_assign(std::next(m.begin(), 2), ConvertibleTransparent{8}, std::move(mv7)); + assert(m.size() == 13); + assert(mv7.moved()); // was moved from + assert(r7->first == 8); // key + assert(r7->second.get() == 15); // value + + Moveable mv8(17, 17.0); + std::same_as decltype(auto) r8 = + m.insert_or_assign(std::next(m.begin(), 2), ConvertibleTransparent{9}, std::move(mv8)); + assert(m.size() == 14); + assert(mv8.moved()); // was moved from + assert(r8->first == 9); // key + assert(r8->second.get() == 17); // value + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto p = m.insert_or_assign(ConvertibleTransparent{3}, 5); + assert(!p.second); + assert(transparent_used); + } + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto it = m.insert_or_assign(m.begin(), ConvertibleTransparent{3}, 5); + assert(it->second == 5); + assert(transparent_used); + } + + { + auto insert_or_assign = [](auto& m, auto key_arg, auto value_arg) { + using M = std::decay_t; + using Key = typename M::key_type; + m.insert_or_assign(ConvertibleTransparent{key_arg}, value_arg); + }; + test_emplace_exception_guarantee(insert_or_assign); + } + + { + auto insert_or_assign_iter = [](auto& m, auto key_arg, auto value_arg) { + using M = std::decay_t; + using Key = typename M::key_type; + m.insert_or_assign(m.begin(), ConvertibleTransparent{key_arg}, value_arg); + }; + test_emplace_exception_guarantee(insert_or_assign_iter); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range.pass.cpp new file mode 100644 index 000000000000000..a2e64431a3c255a --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range.pass.cpp @@ -0,0 +1,109 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template R> +// void insert_range(R&& rg); + +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "MoveOnly.h" +#include "test_macros.h" +#include "test_iterators.h" +#include "min_allocator.h" + +// test constraint container-compatible-range +template +concept CanInsertRange = requires(M m, R&& r) { m.insert_range(std::forward(r)); }; + +using Map = std::flat_map; + +static_assert(CanInsertRange*>>); +static_assert(CanInsertRange*>>); +static_assert(!CanInsertRange>); +static_assert(!CanInsertRange>); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + + { + using P = std::pair; + using M = std::flat_map, KeyContainer, ValueContainer>; + using It = forward_iterator; + M m = {{10, 1}, {8, 2}, {5, 3}, {2, 4}, {1, 5}}; + P ar[] = {{3, 1}, {1, 2}, {4, 3}, {1, 4}, {5, 5}, {9, 6}}; + std::ranges::subrange r = {It(ar), It(ar + 6)}; + static_assert(std::ranges::common_range); + m.insert_range(r); + assert((m == M{{1, 5}, {2, 4}, {3, 1}, {4, 3}, {5, 3}, {8, 2}, {9, 6}, {10, 1}})); + } + { + using P = std::pair; + using M = std::flat_map, KeyContainer, ValueContainer>; + using It = cpp20_input_iterator; + M m = {{8, 1}, {5, 2}, {3, 3}, {2, 4}}; + P ar[] = {{3, 1}, {1, 2}, {4, 3}, {1, 4}, {5, 5}, {9, 6}}; + std::ranges::subrange r = {It(ar), sentinel_wrapper(It(ar + 6))}; + static_assert(!std::ranges::common_range); + m.insert_range(r); + assert((m == M{{1, 2}, {2, 4}, {3, 3}, {4, 3}, {5, 2}, {8, 1}, {9, 6}})); + } + { + // The "uniquing" part uses the comparator, not operator==. + struct ModTen { + bool operator()(int a, int b) const { return (a % 10) < (b % 10); } + }; + using P = std::pair; + using M = std::flat_map; + M m = {{21, 0}, {43, 0}, {15, 0}, {37, 0}}; + P ar[] = {{33, 1}, {18, 1}, {55, 1}, {18, 1}, {42, 1}}; + m.insert_range(ar); + assert((m == M{{21, 0}, {42, 1}, {43, 0}, {15, 0}, {37, 0}, {18, 1}})); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + { + // Items are forwarded correctly from the input range (P2767). + std::pair a[] = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}}; + std::flat_map m; + m.insert_range(a | std::views::as_rvalue); + std::pair expected[] = {{1, 1}, {3, 3}, {4, 4}, {5, 5}}; + assert(std::ranges::equal(m, expected)); + } + { + // The element type of the range doesn't need to be std::pair (P2767). + std::pair pa[] = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}}; + std::deque>> a(pa, pa + 5); + std::flat_map m; + m.insert_range(a); + std::pair expected[] = {{1, 1}, {3, 3}, {4, 4}, {5, 5}}; + assert(std::ranges::equal(m, expected)); + } + { + auto insert_func = [](auto& m, const auto& newValues) { m.insert_range(newValues); }; + test_insert_range_exception_guarantee(insert_func); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range_stability.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range_stability.pass.cpp new file mode 100644 index 000000000000000..fabcb1d216a78a1 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range_stability.pass.cpp @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template R> +// void insert_range(R&& rg); +// +// libc++ uses stable_sort to ensure that flat_map's behavior matches map's, +// in terms of which duplicate items are kept. +// This tests a conforming extension. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" + +struct Mod256 { + bool operator()(int x, int y) const { return (x % 256) < (y % 256); } +}; + +int main(int, char**) { + { + std::mt19937 randomness; + std::pair pairs[400]; + for (int i = 0; i < 400; ++i) { + uint16_t r = randomness(); + pairs[i] = {r, r}; + } + + std::map m(pairs, pairs + 200); + std::flat_map fm(std::sorted_unique, m.begin(), m.end()); + assert(std::ranges::equal(fm, m)); + + fm.insert_range(std::views::counted(pairs + 200, 200)); + m.insert(pairs + 200, pairs + 400); + assert(fm.size() == m.size()); + LIBCPP_ASSERT(std::ranges::equal(fm, m)); + } + + { + std::vector> v{{1, 2}, {1, 3}}; + std::flat_map m; + m.insert_range(v); + assert(m.size() == 1); + LIBCPP_ASSERT(m[1] == 2); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_rv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_rv.pass.cpp new file mode 100644 index 000000000000000..9ea7a6a6366664a --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_rv.pass.cpp @@ -0,0 +1,124 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// class flat_map + +// pair insert( value_type&& v); + +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "MoveOnly.h" +#include "min_allocator.h" +#include "test_macros.h" +#include "../helpers.h" + +template +void do_insert_rv_test() { + using M = Container; + using P = Pair; + using R = std::pair; + M m; + std::same_as decltype(auto) r = m.insert(P(2, 2)); + assert(r.second); + assert(r.first == m.begin()); + assert(m.size() == 1); + assert(r.first->first == 2); + assert(r.first->second == 2); + + r = m.insert(P(1, 1)); + assert(r.second); + assert(r.first == m.begin()); + assert(m.size() == 2); + assert(r.first->first == 1); + assert(r.first->second == 1); + + r = m.insert(P(3, 3)); + assert(r.second); + assert(r.first == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r.first->first == 3); + assert(r.first->second == 3); + + r = m.insert(P(3, 3)); + assert(!r.second); + assert(r.first == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r.first->first == 3); + assert(r.first->second == 3); +} + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + + using P = std::pair; + using CP = std::pair; + + do_insert_rv_test(); + do_insert_rv_test(); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + using M = std::flat_map; + using R = std::pair; + M m; + R r = m.insert({2, MoveOnly(2)}); + assert(r.second); + assert(r.first == m.begin()); + assert(m.size() == 1); + assert(r.first->first == 2); + assert(r.first->second == 2); + + r = m.insert({1, MoveOnly(1)}); + assert(r.second); + assert(r.first == m.begin()); + assert(m.size() == 2); + assert(r.first->first == 1); + assert(r.first->second == 1); + + r = m.insert({3, MoveOnly(3)}); + assert(r.second); + assert(r.first == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r.first->first == 3); + assert(r.first->second == 3); + + r = m.insert({3, MoveOnly(3)}); + assert(!r.second); + assert(r.first == std::ranges::prev(m.end())); + assert(m.size() == 3); + assert(r.first->first == 3); + assert(r.first->second == 3); + } + { + auto insert_func = [](auto& m, auto key_arg, auto value_arg) { + using FlatMap = std::decay_t; + using value_type = typename FlatMap::value_type; + value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg)); + m.insert(std::move(p)); + }; + test_emplace_exception_guarantee(insert_func); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_initializer_list.pass.cpp new file mode 100644 index 000000000000000..08d2caf34987916 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_initializer_list.pass.cpp @@ -0,0 +1,66 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// void insert(initializer_list il); + +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + using V = std::pair; + M m = {{1, 1}, {1, 1.5}, {1, 2}, {3, 1}, {3, 1.5}, {3, 2}}; + m.insert(std::sorted_unique, + { + {0, 1}, + {1, 2}, + {2, 1}, + {4, 1}, + }); + assert(m.size() == 5); + assert(std::distance(m.begin(), m.end()) == 5); + assert(*m.begin() == V(0, 1)); + assert(*std::next(m.begin()) == V(1, 1)); + assert(*std::next(m.begin(), 2) == V(2, 1)); + assert(*std::next(m.begin(), 3) == V(3, 1)); + assert(*std::next(m.begin(), 4) == V(4, 1)); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto insert_func = [](auto& m, const auto& newValues) { + using FlatMap = std::decay_t; + using value_type = typename FlatMap::value_type; + std::initializer_list il = {{newValues[0].first, newValues[0].second}}; + m.insert(std::sorted_unique, il); + }; + test_insert_range_exception_guarantee(insert_func); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_iter_iter.pass.cpp new file mode 100644 index 000000000000000..18a3b571a419949 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_iter_iter.pass.cpp @@ -0,0 +1,86 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// void insert(sorted_unique_t, InputIterator first, InputIterator last); + +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "test_iterators.h" +#include "min_allocator.h" + +// test constraint InputIterator +template +concept CanInsert = requires(M m, Args&&... args) { m.insert(std::forward(args)...); }; + +using Map = std::flat_map; +using Pair = std::pair; + +static_assert(CanInsert); +static_assert(CanInsert, cpp17_input_iterator>); +static_assert(!CanInsert); +static_assert(!CanInsert, cpp20_input_iterator>); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using P = std::pair; + + P ar1[] = { + P(1, 1), + P(2, 1), + P(3, 1), + }; + + P ar2[] = { + P(0, 1), + P(2, 2), + P(4, 1), + }; + + M m; + m.insert( + std::sorted_unique, cpp17_input_iterator(ar1), cpp17_input_iterator(ar1 + sizeof(ar1) / sizeof(ar1[0]))); + assert(m.size() == 3); + M expected{{1, 1}, {2, 1}, {3, 1}}; + assert(m == expected); + + m.insert( + std::sorted_unique, cpp17_input_iterator(ar2), cpp17_input_iterator(ar2 + sizeof(ar2) / sizeof(ar2[0]))); + assert(m.size() == 5); + M expected2{{0, 1}, {1, 1}, {2, 1}, {3, 1}, {4, 1}}; + assert(m == expected2); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + auto insert_func = [](auto& m, const auto& newValues) { + m.insert(std::sorted_unique, newValues.begin(), newValues.end()); + }; + test_insert_range_exception_guarantee(insert_func); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_transparent.pass.cpp new file mode 100644 index 000000000000000..75cabb70630f325 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_transparent.pass.cpp @@ -0,0 +1,167 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template pair insert(P&& x); +// template iterator insert(const_iterator hint, P&& x); + +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "test_iterators.h" +#include "min_allocator.h" + +// Constraints: is_constructible_v, P> is true. +template +concept CanInsert = requires(M m, Args&&... args) { m.insert(std::forward(args)...); }; + +using Map = std::flat_map; +using Iter = Map::const_iterator; + +static_assert(CanInsert&&>); +static_assert(CanInsert&&>); +static_assert(CanInsert&&>); +static_assert(CanInsert&&>); +static_assert(!CanInsert); +static_assert(!CanInsert); + +static int expensive_comparisons = 0; +static int cheap_comparisons = 0; + +struct CompareCounter { + int i_ = 0; + CompareCounter(int i) : i_(i) {} + friend auto operator<=>(const CompareCounter& x, const CompareCounter& y) { + expensive_comparisons += 1; + return x.i_ <=> y.i_; + } + bool operator==(const CompareCounter&) const = default; + friend auto operator<=>(const CompareCounter& x, int y) { + cheap_comparisons += 1; + return x.i_ <=> y; + } +}; + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + const std::pair expected[] = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}}; + { + // insert(P&&) + // Unlike flat_set, here we can't use key_compare to compare value_type versus P, + // so we must eagerly convert to value_type. + M m = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + expensive_comparisons = 0; + cheap_comparisons = 0; + std::same_as> auto p = + m.insert(std::make_pair(3, 3)); // conversion happens first + assert(expensive_comparisons >= 2); + assert(cheap_comparisons == 0); + assert(p == std::make_pair(m.begin() + 2, true)); + assert(std::ranges::equal(m, expected)); + } + { + // insert(const_iterator, P&&) + M m = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + expensive_comparisons = 0; + cheap_comparisons = 0; + std::same_as auto it = m.insert(m.begin(), std::make_pair(3, 3)); + assert(expensive_comparisons >= 2); + assert(cheap_comparisons == 0); + assert(it == m.begin() + 2); + assert(std::ranges::equal(m, expected)); + } + { + // insert(value_type&&) + M m = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + expensive_comparisons = 0; + cheap_comparisons = 0; + std::same_as> auto p = + m.insert(std::make_pair(3, 3)); // conversion happens last + assert(expensive_comparisons >= 2); + assert(cheap_comparisons == 0); + assert(p == std::make_pair(m.begin() + 2, true)); + assert(std::ranges::equal(m, expected)); + } + { + // insert(const_iterator, value_type&&) + M m = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + expensive_comparisons = 0; + cheap_comparisons = 0; + std::same_as auto it = m.insert(m.begin(), std::make_pair(3, 3)); + assert(expensive_comparisons >= 2); + assert(cheap_comparisons == 0); + assert(it == m.begin() + 2); + assert(std::ranges::equal(m, expected)); + } + { + // emplace(Args&&...) + M m = {{1, 1}, {2, 2}, {4, 4}, {5, 5}}; + expensive_comparisons = 0; + cheap_comparisons = 0; + std::same_as> auto p = + m.emplace(std::make_pair(3, 3)); // conversion happens first + assert(expensive_comparisons >= 2); + assert(cheap_comparisons == 0); + assert(p == std::make_pair(m.begin() + 2, true)); + assert(std::ranges::equal(m, expected)); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + // no ambiguity between insert(pos, P&&) and insert(first, last) + using M = std::flat_map; + struct Evil { + operator M::value_type() const; + operator M::const_iterator() const; + }; + std::flat_map m; + ASSERT_SAME_TYPE(decltype(m.insert(Evil())), std::pair); + ASSERT_SAME_TYPE(decltype(m.insert(m.begin(), Evil())), M::iterator); + ASSERT_SAME_TYPE(decltype(m.insert(m.begin(), m.end())), void); + } + { + auto insert_func = [](auto& m, auto key_arg, auto value_arg) { + using FlatMap = std::decay_t; + using tuple_type = std::tuple; + tuple_type t(key_arg, value_arg); + m.insert(t); + }; + test_emplace_exception_guarantee(insert_func); + } + { + auto insert_func_iter = [](auto& m, auto key_arg, auto value_arg) { + using FlatMap = std::decay_t; + using tuple_type = std::tuple; + tuple_type t(key_arg, value_arg); + m.insert(m.begin(), t); + }; + test_emplace_exception_guarantee(insert_func_iter); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/replace.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/replace.pass.cpp new file mode 100644 index 000000000000000..5ca811d76152014 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/replace.pass.cpp @@ -0,0 +1,80 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// void replace(key_container_type&& key_cont, mapped_container_type&& mapped_cont); + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +concept CanReplace = requires(T t, Args&&... args) { t.replace(std::forward(args)...); }; + +using Map = std::flat_map; +static_assert(CanReplace, std::vector>); +static_assert(!CanReplace&, std::vector>); +static_assert(!CanReplace, const std::vector&>); +static_assert(!CanReplace&, const std::vector&>); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + M m = M({1, 2, 3}, {4, 5, 6}); + KeyContainer new_keys = {7, 8}; + ValueContainer new_values = {9, 10}; + auto expected_keys = new_keys; + auto expected_values = new_values; + m.replace(std::move(new_keys), std::move(new_values)); + assert(m.size() == 2); + assert(std::ranges::equal(m.keys(), expected_keys)); + assert(std::ranges::equal(m.values(), expected_values)); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { +#ifndef TEST_HAS_NO_EXCEPTIONS + using KeyContainer = std::vector; + using ValueContainer = ThrowOnMoveContainer; + using M = std::flat_map; + + M m; + m.emplace(1, 1); + m.emplace(2, 2); + try { + KeyContainer new_keys{3, 4}; + ValueContainer new_values{5, 6}; + m.replace(std::move(new_keys), std::move(new_values)); + assert(false); + } catch (int) { + check_invariant(m); + // In libc++, we clear the map + LIBCPP_ASSERT(m.size() == 0); + } +#endif + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_exception.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_exception.pass.cpp new file mode 100644 index 000000000000000..f9708aac62c7eec --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_exception.pass.cpp @@ -0,0 +1,78 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// `check_assertion.h` requires Unix headers and regex support. +// REQUIRES: has-unix-headers +// UNSUPPORTED: no-localization +// UNSUPPORTED: no-exceptions + +// + +// void swap(flat_map& y) noexcept; +// friend void swap(flat_map& x, flat_map& y) noexcept + +// Test that std::terminate is called if any exception is thrown during swap + +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "../helpers.h" +#include "check_assertion.h" + +template +void test_swap_exception_guarantee([[maybe_unused]] F&& swap_function) { + { + // key swap throws + using KeyContainer = ThrowOnMoveContainer; + using ValueContainer = std::vector; + using M = std::flat_map; + + M m1, m2; + m1.emplace(1, 1); + m1.emplace(2, 2); + m2.emplace(3, 3); + m2.emplace(4, 4); + // swap is noexcept + EXPECT_STD_TERMINATE([&] { swap_function(m1, m2); }); + } + + { + // value swap throws + using KeyContainer = std::vector; + using ValueContainer = ThrowOnMoveContainer; + using M = std::flat_map; + + M m1, m2; + m1.emplace(1, 1); + m1.emplace(2, 2); + m2.emplace(3, 3); + m2.emplace(4, 4); + + // swap is noexcept + EXPECT_STD_TERMINATE([&] { swap_function(m1, m2); }); + } +} + +int main(int, char**) { + { + auto swap_func = [](auto& m1, auto& m2) { swap(m1, m2); }; + test_swap_exception_guarantee(swap_func); + } + + { + auto swap_func = [](auto& m1, auto& m2) { m1.swap(m2); }; + test_swap_exception_guarantee(swap_func); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_free.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_free.pass.cpp new file mode 100644 index 000000000000000..98c60c1488cf532 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_free.pass.cpp @@ -0,0 +1,97 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// friend void swap(flat_map& x, flat_map& y) noexcept + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "MoveOnly.h" +#include "min_allocator.h" +#include "test_macros.h" +#include "../helpers.h" + +// test noexcept + +template +concept NoExceptAdlSwap = requires(T t1, T t2) { + { swap(t1, t2) } noexcept; +}; + +static_assert(NoExceptAdlSwap>); + +#ifndef TEST_HAS_NO_EXCEPTIONS +static_assert( + NoExceptAdlSwap, ThrowOnMoveContainer, ThrowOnMoveContainer>>); +#endif + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using V = std::pair; + + { + M m1; + M m2; + M m1_save = m1; + M m2_save = m2; + swap(m1, m2); + assert(m1 == m2_save); + assert(m2 == m1_save); + } + { + V ar2[] = {V(5, 5), V(6, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(11, 11), V(12, 12)}; + M m1; + M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0])); + M m1_save = m1; + M m2_save = m2; + swap(m1, m2); + assert(m1 == m2_save); + assert(m2 == m1_save); + } + { + V ar1[] = {V(1, 1), V(2, 2), V(3, 3), V(4, 4)}; + M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0])); + M m2; + M m1_save = m1; + M m2_save = m2; + swap(m1, m2); + assert(m1 == m2_save); + assert(m2 == m1_save); + } + { + V ar1[] = {V(1, 1), V(2, 2), V(3, 3), V(4, 4)}; + V ar2[] = {V(5, 5), V(6, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(11, 11), V(12, 12)}; + M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0])); + M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0])); + M m1_save = m1; + M m2_save = m2; + swap(m1, m2); + assert(m1 == m2_save); + assert(m2 == m1_save); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_member.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_member.pass.cpp new file mode 100644 index 000000000000000..d2d8f5673edeb43 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_member.pass.cpp @@ -0,0 +1,95 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// void swap(flat_map& y) noexcept; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "MoveOnly.h" +#include "min_allocator.h" +#include "test_macros.h" +#include "../helpers.h" + +// test noexcept + +template +concept NoExceptMemberSwap = requires(T t1, T t2) { + { t1.swap(t2) } noexcept; +}; + +static_assert(NoExceptMemberSwap>); +#ifndef TEST_HAS_NO_EXCEPTIONS +static_assert( + NoExceptMemberSwap, ThrowOnMoveContainer, ThrowOnMoveContainer>>); +#endif + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + using V = std::pair; + { + M m1; + M m2; + M m1_save = m1; + M m2_save = m2; + m1.swap(m2); + assert(m1 == m2_save); + assert(m2 == m1_save); + } + { + V ar2[] = {V(5, 5), V(6, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(11, 11), V(12, 12)}; + M m1; + M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0])); + M m1_save = m1; + M m2_save = m2; + m1.swap(m2); + assert(m1 == m2_save); + assert(m2 == m1_save); + } + { + V ar1[] = {V(1, 1), V(2, 2), V(3, 3), V(4, 4)}; + M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0])); + M m2; + M m1_save = m1; + M m2_save = m2; + m1.swap(m2); + assert(m1 == m2_save); + assert(m2 == m1_save); + } + { + V ar1[] = {V(1, 1), V(2, 2), V(3, 3), V(4, 4)}; + V ar2[] = {V(5, 5), V(6, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(11, 11), V(12, 12)}; + M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0])); + M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0])); + M m1_save = m1; + M m2_save = m2; + m1.swap(m2); + assert(m1 == m2_save); + assert(m2 == m1_save); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace.pass.cpp new file mode 100644 index 000000000000000..4be2fe1c4333e02 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace.pass.cpp @@ -0,0 +1,246 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// pair try_emplace(const key_type& k, Args&&... args); +// template +// pair try_emplace(key_type&& k, Args&&... args); +// template +// iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args); +// template +// iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args); + +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "../helpers.h" +#include "min_allocator.h" +#include "../../../Emplaceable.h" + +// Constraints: is_constructible_v is true. +template +concept CanTryEmplace = requires(M m, Args&&... args) { m.try_emplace(std::forward(args)...); }; + +using Map = std::flat_map; +using Iter = typename Map::const_iterator; +static_assert(!CanTryEmplace); + +static_assert(CanTryEmplace); +static_assert(CanTryEmplace); +static_assert(CanTryEmplace); +static_assert(!CanTryEmplace); +static_assert(!CanTryEmplace); + +static_assert(CanTryEmplace); +static_assert(CanTryEmplace); +static_assert(CanTryEmplace); +static_assert(!CanTryEmplace); +static_assert(!CanTryEmplace); + +static_assert(CanTryEmplace); +static_assert(CanTryEmplace); +static_assert(CanTryEmplace); +static_assert(!CanTryEmplace); +static_assert(!CanTryEmplace); + +static_assert(CanTryEmplace); +static_assert(CanTryEmplace); +static_assert(CanTryEmplace); +static_assert(!CanTryEmplace); +static_assert(!CanTryEmplace); + +template +void test_ck() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + { // pair try_emplace(const key_type& k, Args&&... args); + using R = std::pair; + M m; + for (int i = 0; i < 20; i += 2) + m.emplace(i, Moveable(i, (double)i)); + + assert(m.size() == 10); + + Moveable mv1(3, 3.0); + for (int i = 0; i < 20; i += 2) { + std::same_as decltype(auto) r = m.try_emplace(i, std::move(mv1)); + assert(m.size() == 10); + assert(!r.second); // was not inserted + assert(!mv1.moved()); // was not moved from + assert(r.first->first == i); // key + } + + std::same_as decltype(auto) r2 = m.try_emplace(-1, std::move(mv1)); + assert(m.size() == 11); + assert(r2.second); // was inserted + assert(mv1.moved()); // was moved from + assert(r2.first->first == -1); // key + assert(r2.first->second.get() == 3); // value + + Moveable mv2(5, 3.0); + std::same_as decltype(auto) r3 = m.try_emplace(5, std::move(mv2)); + assert(m.size() == 12); + assert(r3.second); // was inserted + assert(mv2.moved()); // was moved from + assert(r3.first->first == 5); // key + assert(r3.first->second.get() == 5); // value + + Moveable mv3(-1, 3.0); + std::same_as decltype(auto) r4 = m.try_emplace(117, std::move(mv2)); + assert(m.size() == 13); + assert(r4.second); // was inserted + assert(mv2.moved()); // was moved from + assert(r4.first->first == 117); // key + assert(r4.first->second.get() == -1); // value + } + + { // iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args); + using R = typename M::iterator; + M m; + for (int i = 0; i < 20; i += 2) + m.try_emplace(i, Moveable(i, (double)i)); + assert(m.size() == 10); + typename M::const_iterator it = m.find(2); + + Moveable mv1(3, 3.0); + for (int i = 0; i < 20; i += 2) { + std::same_as decltype(auto) r1 = m.try_emplace(it, i, std::move(mv1)); + assert(m.size() == 10); + assert(!mv1.moved()); // was not moved from + assert(r1->first == i); // key + assert(r1->second.get() == i); // value + } + + std::same_as decltype(auto) r2 = m.try_emplace(it, 3, std::move(mv1)); + assert(m.size() == 11); + assert(mv1.moved()); // was moved from + assert(r2->first == 3); // key + assert(r2->second.get() == 3); // value + } +} + +template +void test_rk() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + { // pair try_emplace(key_type&& k, Args&&... args); + using R = std::pair; + M m; + for (int i = 0; i < 20; i += 2) { + m.emplace(Moveable(i, (double)i), Moveable(i + 1, (double)i + 1)); + } + assert(m.size() == 10); + + Moveable mvkey1(2, 2.0); + Moveable mv1(4, 4.0); + std::same_as decltype(auto) r1 = m.try_emplace(std::move(mvkey1), std::move(mv1)); + assert(m.size() == 10); + assert(!r1.second); // was not inserted + assert(!mv1.moved()); // was not moved from + assert(!mvkey1.moved()); // was not moved from + assert(r1.first->first == mvkey1); // key + + Moveable mvkey2(3, 3.0); + std::same_as decltype(auto) r2 = m.try_emplace(std::move(mvkey2), std::move(mv1)); + assert(m.size() == 11); + assert(r2.second); // was inserted + assert(mv1.moved()); // was moved from + assert(mvkey2.moved()); // was moved from + assert(r2.first->first.get() == 3); // key + assert(r2.first->second.get() == 4); // value + } + + { // iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args); + using R = typename M::iterator; + M m; + for (int i = 0; i < 20; i += 2) + m.emplace(Moveable(i, (double)i), Moveable(i + 1, (double)i + 1)); + assert(m.size() == 10); + typename M::const_iterator it = std::next(m.cbegin()); + + Moveable mvkey1(2, 2.0); + Moveable mv1(4, 4.0); + std::same_as decltype(auto) r1 = m.try_emplace(it, std::move(mvkey1), std::move(mv1)); + assert(m.size() == 10); + assert(!mv1.moved()); // was not moved from + assert(!mvkey1.moved()); // was not moved from + assert(r1->first == mvkey1); // key + + Moveable mvkey2(3, 3.0); + std::same_as decltype(auto) r2 = m.try_emplace(it, std::move(mvkey2), std::move(mv1)); + assert(m.size() == 11); + assert(mv1.moved()); // was moved from + assert(mvkey2.moved()); // was moved from + assert(r2->first.get() == 3); // key + assert(r2->second.get() == 4); // value + } +} + +int main(int, char**) { + test_ck, std::vector>(); + test_ck, std::vector>(); + test_ck, MinSequenceContainer>(); + test_ck>, std::vector>>(); + + test_rk, std::vector>(); + test_rk, std::vector>(); + test_rk, MinSequenceContainer>(); + test_rk>, std::vector>>(); + + { + auto try_emplace_ck = [](auto& m, auto key_arg, auto value_arg) { + using M = std::decay_t; + using Key = typename M::key_type; + const Key key{key_arg}; + m.try_emplace(key, value_arg); + }; + test_emplace_exception_guarantee(try_emplace_ck); + } + + { + auto try_emplace_rk = [](auto& m, auto key_arg, auto value_arg) { + using M = std::decay_t; + using Key = typename M::key_type; + m.try_emplace(Key{key_arg}, value_arg); + }; + test_emplace_exception_guarantee(try_emplace_rk); + } + + { + auto try_emplace_iter_ck = [](auto& m, auto key_arg, auto value_arg) { + using M = std::decay_t; + using Key = typename M::key_type; + const Key key{key_arg}; + m.try_emplace(m.begin(), key, value_arg); + }; + test_emplace_exception_guarantee(try_emplace_iter_ck); + } + + { + auto try_emplace_iter_rk = [](auto& m, auto key_arg, auto value_arg) { + using M = std::decay_t; + using Key = typename M::key_type; + m.try_emplace(m.begin(), Key{key_arg}, value_arg); + }; + test_emplace_exception_guarantee(try_emplace_iter_rk); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace_transparent.pass.cpp new file mode 100644 index 000000000000000..21fda437809674b --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace_transparent.pass.cpp @@ -0,0 +1,182 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template +// pair try_emplace(K&& k, Args&&... args); +// template +// iterator try_emplace(const_iterator hint, K&& k, Args&&... args); + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "../helpers.h" +#include "min_allocator.h" +#include "../../../Emplaceable.h" + +// Constraints: +// The qualified-id Compare::is_transparent is valid and denotes a type. +// is_constructible_v is true. +// is_constructible_v is true. +// For the first overload, is_convertible_v and is_convertible_v are both false +template +concept CanTryEmplace = requires(M m, Args&&... args) { m.try_emplace(std::forward(args)...); }; + +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; + +using TransparentMapIter = typename TransparentMap::iterator; +using TransparentMapConstIter = typename TransparentMap::const_iterator; + +static_assert(!CanTryEmplace); +static_assert(!CanTryEmplace); + +static_assert(CanTryEmplace>); +static_assert(CanTryEmplace, Emplaceable>); +static_assert(CanTryEmplace, int, double>); +static_assert(!CanTryEmplace, const Emplaceable&>); +static_assert(!CanTryEmplace, int>); +static_assert(!CanTryEmplace, Emplaceable>); +static_assert(!CanTryEmplace, Emplaceable>); +static_assert(!CanTryEmplace, int>); +static_assert(!CanTryEmplace); +static_assert(!CanTryEmplace); + +static_assert(CanTryEmplace>); +static_assert(CanTryEmplace, Emplaceable>); +static_assert(CanTryEmplace, int, double>); +static_assert(!CanTryEmplace, const Emplaceable&>); +static_assert(!CanTryEmplace, int>); +static_assert(!CanTryEmplace, Emplaceable>); +static_assert(!CanTryEmplace, Emplaceable>); +static_assert(!CanTryEmplace, int>); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + + { // pair try_emplace(K&& k, Args&&... args); + using R = std::pair; + M m; + for (int i = 0; i < 20; i += 2) + m.emplace(i, Moveable(i, (double)i)); + + assert(m.size() == 10); + + Moveable mv1(3, 3.0); + for (int i = 0; i < 20; i += 2) { + std::same_as decltype(auto) r = m.try_emplace(ConvertibleTransparent{i}, std::move(mv1)); + assert(m.size() == 10); + assert(!r.second); // was not inserted + assert(!mv1.moved()); // was not moved from + assert(r.first->first == i); // key + } + + std::same_as decltype(auto) r2 = m.try_emplace(ConvertibleTransparent{-1}, std::move(mv1)); + assert(m.size() == 11); + assert(r2.second); // was inserted + assert(mv1.moved()); // was moved from + assert(r2.first->first == -1); // key + assert(r2.first->second.get() == 3); // value + + Moveable mv2(5, 3.0); + std::same_as decltype(auto) r3 = m.try_emplace(ConvertibleTransparent{5}, std::move(mv2)); + assert(m.size() == 12); + assert(r3.second); // was inserted + assert(mv2.moved()); // was moved from + assert(r3.first->first == 5); // key + assert(r3.first->second.get() == 5); // value + + Moveable mv3(-1, 3.0); + std::same_as decltype(auto) r4 = m.try_emplace(ConvertibleTransparent{117}, std::move(mv2)); + assert(m.size() == 13); + assert(r4.second); // was inserted + assert(mv2.moved()); // was moved from + assert(r4.first->first == 117); // key + assert(r4.first->second.get() == -1); // value + } + + { // iterator try_emplace(const_iterator hint, K&& k, Args&&... args); + using R = typename M::iterator; + M m; + for (int i = 0; i < 20; i += 2) + m.try_emplace(i, Moveable(i, (double)i)); + assert(m.size() == 10); + typename M::const_iterator it = m.find(2); + + Moveable mv1(3, 3.0); + for (int i = 0; i < 20; i += 2) { + std::same_as decltype(auto) r1 = m.try_emplace(it, ConvertibleTransparent{i}, std::move(mv1)); + assert(m.size() == 10); + assert(!mv1.moved()); // was not moved from + assert(r1->first == i); // key + assert(r1->second.get() == i); // value + } + + std::same_as decltype(auto) r2 = m.try_emplace(it, ConvertibleTransparent{3}, std::move(mv1)); + assert(m.size() == 11); + assert(mv1.moved()); // was moved from + assert(r2->first == 3); // key + assert(r2->second.get() == 3); // value + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto p = m.try_emplace(ConvertibleTransparent{3}, 3); + assert(!p.second); + assert(transparent_used); + } + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto p = m.try_emplace(m.begin(), ConvertibleTransparent{3}, 3); + assert(p->second == 3); + assert(transparent_used); + } + { + auto try_emplace = [](auto& m, auto key_arg, auto value_arg) { + using M = std::decay_t; + using Key = typename M::key_type; + m.try_emplace(ConvertibleTransparent{key_arg}, value_arg); + }; + test_emplace_exception_guarantee(try_emplace); + } + + { + auto try_emplace_iter = [](auto& m, auto key_arg, auto value_arg) { + using M = std::decay_t; + using Key = typename M::key_type; + m.try_emplace(m.begin(), ConvertibleTransparent{key_arg}, value_arg); + }; + test_emplace_exception_guarantee(try_emplace_iter); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/comp.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/comp.pass.cpp new file mode 100644 index 000000000000000..d86224952dee453 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/comp.pass.cpp @@ -0,0 +1,96 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// key_compare key_comp() const; +// value_compare value_comp() const; + +#include +#include +#include +#include +#include + +#include "test_macros.h" + +int main(int, char**) { + { + using M = std::flat_map; + using Comp = std::less; // the default + M m = {}; + ASSERT_SAME_TYPE(M::key_compare, Comp); + static_assert(!std::is_same_v); + ASSERT_SAME_TYPE(decltype(m.key_comp()), Comp); + ASSERT_SAME_TYPE(decltype(m.value_comp()), M::value_compare); + Comp kc = m.key_comp(); + assert(kc(1, 2)); + assert(!kc(2, 1)); + auto vc = m.value_comp(); + ASSERT_SAME_TYPE(decltype(vc(std::make_pair(1, 2), std::make_pair(1, 2))), bool); + assert(vc({1, '2'}, {2, '1'})); + assert(!vc({2, '1'}, {1, '2'})); + } + { + using Comp = std::function; + using M = std::flat_map; + Comp comp = std::greater(); + M m({}, comp); + ASSERT_SAME_TYPE(M::key_compare, Comp); + ASSERT_SAME_TYPE(decltype(m.key_comp()), Comp); + ASSERT_SAME_TYPE(decltype(m.value_comp()), M::value_compare); + Comp kc = m.key_comp(); + assert(!kc(1, 2)); + assert(kc(2, 1)); + auto vc = m.value_comp(); + auto a = std::make_pair(1, 2); + ASSERT_SAME_TYPE(decltype(vc(a, a)), bool); + static_assert(!noexcept(vc(a, a))); + assert(!vc({1, 2}, {2, 1})); + assert(vc({2, 1}, {1, 2})); + } + { + using Comp = std::less<>; + using M = std::flat_map; + M m = {}; + ASSERT_SAME_TYPE(M::key_compare, Comp); + ASSERT_SAME_TYPE(decltype(m.key_comp()), Comp); + ASSERT_SAME_TYPE(decltype(m.value_comp()), M::value_compare); + Comp kc = m.key_comp(); + assert(kc(1, 2)); + assert(!kc(2, 1)); + auto vc = m.value_comp(); + auto a = std::make_pair(1, 2); + ASSERT_SAME_TYPE(decltype(vc(a, a)), bool); + assert(vc({1, 2}, {2, 1})); + assert(!vc({2, 1}, {1, 2})); + } + { + using Comp = std::function&, const std::vector&)>; + using M = std::flat_map, int, Comp>; + Comp comp = [i = 1](const auto& x, const auto& y) { return x[i] < y[i]; }; + M m({}, comp); + auto vc = m.value_comp(); + static_assert(sizeof(vc) >= sizeof(Comp)); + comp = nullptr; + m = M({}, nullptr); + assert(m.key_comp() == nullptr); + // At this point, m.key_comp() is disengaged. + // But the std::function captured by copy inside `vc` remains valid. + auto a = std::make_pair(std::vector{2, 1, 4}, 42); + auto b = std::make_pair(std::vector{1, 2, 3}, 42); + auto c = std::make_pair(std::vector{0, 3, 2}, 42); + assert(vc(a, b)); + assert(vc(b, c)); + assert(!vc(b, a)); + assert(!vc(c, b)); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/keys_values.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/keys_values.pass.cpp new file mode 100644 index 000000000000000..84d8f8344aaa67e --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/keys_values.pass.cpp @@ -0,0 +1,57 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// const key_container_type& keys() const noexcept +// const mapped_container_type& values() const noexcept + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "test_allocator.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + const M m = {{4, 'a'}, {2, 'b'}, {3, 'c'}}; + std::same_as decltype(auto) keys = m.keys(); + std::same_as decltype(auto) values = m.values(); + + // noexcept + static_assert(noexcept(m.keys())); + static_assert(noexcept(m.values())); + + auto expected_keys = {2, 3, 4}; + auto expected_values = {'b', 'c', 'a'}; + assert(std::ranges::equal(keys, expected_keys)); + assert(std::ranges::equal(values, expected_values)); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains.pass.cpp new file mode 100644 index 000000000000000..208d6138fa68363 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains.pass.cpp @@ -0,0 +1,70 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// bool contains(const key_type& x) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m = {{1, 1}, {2, 2}, {4, 4}, {5, 5}, {8, 8}}; + assert(!m.contains(0)); + assert(m.contains(1)); + assert(m.contains(2)); + assert(!m.contains(3)); + assert(m.contains(4)); + assert(m.contains(5)); + assert(!m.contains(6)); + assert(!m.contains(7)); + assert(std::as_const(m).contains(8)); + assert(!std::as_const(m).contains(9)); + m.clear(); + assert(!m.contains(1)); + } + { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m = {{1, 0}, {2, 0}, {4, 0}, {5, 0}, {8, 0}}; + assert(!m.contains(0)); + assert(m.contains(1)); + assert(m.contains(2)); + assert(!m.contains(3)); + assert(m.contains(4)); + assert(m.contains(5)); + assert(!m.contains(6)); + assert(!m.contains(7)); + assert(std::as_const(m).contains(8)); + assert(!std::as_const(m).contains(9)); + m.clear(); + assert(!m.contains(1)); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains_transparent.pass.cpp new file mode 100644 index 000000000000000..0493538ab6dadc7 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains_transparent.pass.cpp @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template bool contains(const K& x) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type. +template +concept CanContains = requires(M m, Transparent k) { m.contains(k); }; +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; +static_assert(CanContains); +static_assert(CanContains); +static_assert(!CanContains); +static_assert(!CanContains); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + + M m = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}}; + ASSERT_SAME_TYPE(decltype(m.contains(Transparent{"abc"})), bool); + ASSERT_SAME_TYPE(decltype(std::as_const(m).contains(Transparent{"b"})), bool); + assert(m.contains(Transparent{"alpha"}) == true); + assert(m.contains(Transparent{"beta"}) == true); + assert(m.contains(Transparent{"epsilon"}) == true); + assert(m.contains(Transparent{"eta"}) == true); + assert(m.contains(Transparent{"gamma"}) == true); + assert(m.contains(Transparent{"al"}) == false); + assert(m.contains(Transparent{""}) == false); + assert(m.contains(Transparent{"g"}) == false); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto b = m.contains(Transparent{3}); + assert(b); + assert(transparent_used); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count.pass.cpp new file mode 100644 index 000000000000000..db675854d5e98b2 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count.pass.cpp @@ -0,0 +1,69 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// size_type count(const key_type& x) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + + { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m = {{1, 1}, {2, 2}, {4, 4}, {5, 5}, {8, 8}}; + ASSERT_SAME_TYPE(decltype(m.count(0)), size_t); + assert(m.count(0) == 0); + assert(m.count(1) == 1); + assert(m.count(2) == 1); + assert(m.count(3) == 0); + assert(m.count(4) == 1); + assert(m.count(5) == 1); + assert(m.count(6) == 0); + assert(m.count(7) == 0); + assert(std::as_const(m).count(8) == 1); + assert(std::as_const(m).count(9) == 0); + } + { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m = {{1, 0}, {2, 0}, {4, 0}, {5, 0}, {8, 0}}; + ASSERT_SAME_TYPE(decltype(m.count(0)), size_t); + assert(m.count(0) == 0); + assert(m.count(1) == 1); + assert(m.count(2) == 1); + assert(m.count(3) == 0); + assert(m.count(4) == 1); + assert(m.count(5) == 1); + assert(m.count(6) == 0); + assert(m.count(7) == 0); + assert(std::as_const(m).count(8) == 1); + assert(std::as_const(m).count(9) == 0); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count_transparent.pass.cpp new file mode 100644 index 000000000000000..cd195ff1fa8b434 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count_transparent.pass.cpp @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template size_type count(const K& x) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type. +template +concept CanCount = requires(M m, Transparent k) { m.count(k); }; +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; +static_assert(CanCount); +static_assert(CanCount); +static_assert(!CanCount); +static_assert(!CanCount); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + + M m = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}}; + ASSERT_SAME_TYPE(decltype(m.count(Transparent{"abc"})), typename M::size_type); + ASSERT_SAME_TYPE(decltype(std::as_const(m).count(Transparent{"b"})), typename M::size_type); + assert(m.count(Transparent{"alpha"}) == 1); + assert(m.count(Transparent{"beta"}) == 1); + assert(m.count(Transparent{"epsilon"}) == 1); + assert(m.count(Transparent{"eta"}) == 1); + assert(m.count(Transparent{"gamma"}) == 1); + assert(m.count(Transparent{"al"}) == 0); + assert(m.count(Transparent{""}) == 0); + assert(m.count(Transparent{"g"}) == 0); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto n = m.count(Transparent{3}); + assert(n == 1); + assert(transparent_used); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range.pass.cpp new file mode 100644 index 000000000000000..8fa73d2a2eb51df --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range.pass.cpp @@ -0,0 +1,78 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// pair equal_range(const key_type& k); +// pair equal_range(const key_type& k) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + { + using M = std::flat_map, KeyContainer, ValueContainer>; + using R = std::pair; + using CR = std::pair; + M m = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}}; + ASSERT_SAME_TYPE(decltype(m.equal_range(0)), R); + ASSERT_SAME_TYPE(decltype(std::as_const(m).equal_range(0)), CR); + auto begin = m.begin(); + assert(m.equal_range(0) == std::pair(begin, begin)); + assert(m.equal_range(1) == std::pair(begin, begin + 1)); + assert(m.equal_range(2) == std::pair(begin + 1, begin + 2)); + assert(m.equal_range(3) == std::pair(begin + 2, begin + 2)); + assert(m.equal_range(4) == std::pair(begin + 2, begin + 3)); + assert(m.equal_range(5) == std::pair(begin + 3, begin + 4)); + assert(m.equal_range(6) == std::pair(begin + 4, begin + 4)); + assert(m.equal_range(7) == std::pair(begin + 4, begin + 4)); + assert(std::as_const(m).equal_range(8) == std::pair(m.cbegin() + 4, m.cbegin() + 5)); + assert(std::as_const(m).equal_range(9) == std::pair(m.cbegin() + 5, m.cbegin() + 5)); + } + + { + using M = std::flat_map, KeyContainer, ValueContainer>; + using R = std::pair; + using CR = std::pair; + M m = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}}; + ASSERT_SAME_TYPE(decltype(m.equal_range(0)), R); + ASSERT_SAME_TYPE(decltype(std::as_const(m).equal_range(0)), CR); + auto begin = m.begin(); + assert(m.equal_range(0) == std::pair(begin + 5, begin + 5)); + assert(m.equal_range(1) == std::pair(begin + 4, begin + 5)); + assert(m.equal_range(2) == std::pair(begin + 3, begin + 4)); + assert(m.equal_range(3) == std::pair(begin + 3, begin + 3)); + assert(m.equal_range(4) == std::pair(begin + 2, begin + 3)); + assert(m.equal_range(5) == std::pair(begin + 1, begin + 2)); + assert(m.equal_range(6) == std::pair(begin + 1, begin + 1)); + assert(m.equal_range(7) == std::pair(begin + 1, begin + 1)); + assert(std::as_const(m).equal_range(8) == std::pair(m.cbegin(), m.cbegin() + 1)); + assert(std::as_const(m).equal_range(9) == std::pair(m.cbegin(), m.cbegin())); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range_transparent.pass.cpp new file mode 100644 index 000000000000000..0198f433bdc4f1d --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range_transparent.pass.cpp @@ -0,0 +1,100 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template pair equal_range(const K& x); +// template pair equal_range(const K& x) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type. +template +concept CanEqualRange = requires(M m, Transparent k) { m.equal_range(k); }; +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; +static_assert(CanEqualRange); +static_assert(CanEqualRange); +static_assert(!CanEqualRange); +static_assert(!CanEqualRange); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + + using R = std::pair; + using CR = std::pair; + M m = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}}; + const auto& cm = m; + ASSERT_SAME_TYPE(decltype(m.equal_range(Transparent{"abc"})), R); + ASSERT_SAME_TYPE(decltype(std::as_const(m).equal_range(Transparent{"b"})), CR); + + auto test_found = [&](auto&& map, const std::string& expected_key, int expected_value) { + auto [first, last] = map.equal_range(Transparent{expected_key}); + assert(last - first == 1); + auto [key, value] = *first; + assert(key == expected_key); + assert(value == expected_value); + }; + + auto test_not_found = [&](auto&& map, const std::string& expected_key, long expected_offset) { + auto [first, last] = map.equal_range(Transparent{expected_key}); + assert(first == last); + assert(first - m.begin() == expected_offset); + }; + + test_found(m, "alpha", 1); + test_found(m, "beta", 2); + test_found(m, "epsilon", 3); + test_found(m, "eta", 4); + test_found(m, "gamma", 5); + test_found(cm, "alpha", 1); + test_found(cm, "beta", 2); + test_found(cm, "epsilon", 3); + test_found(cm, "eta", 4); + test_found(cm, "gamma", 5); + + test_not_found(m, "charlie", 2); + test_not_found(m, "aaa", 0); + test_not_found(m, "zzz", 5); + test_not_found(cm, "charlie", 2); + test_not_found(cm, "aaa", 0); + test_not_found(cm, "zzz", 5); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto p = m.equal_range(Transparent{3}); + assert(p.first != p.second); + assert(transparent_used); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find.pass.cpp new file mode 100644 index 000000000000000..9fae407c7d8f7c5 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find.pass.cpp @@ -0,0 +1,55 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// iterator find(const key_type& k); +// const_iterator find(const key_type& k) const; + +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map, KeyContainer, ValueContainer>; + + M m = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}}; + ASSERT_SAME_TYPE(decltype(m.find(0)), typename M::iterator); + ASSERT_SAME_TYPE(decltype(std::as_const(m).find(0)), typename M::const_iterator); + assert(m.find(0) == m.end()); + assert(m.find(1) == m.begin()); + assert(m.find(2) == m.begin() + 1); + assert(m.find(3) == m.end()); + assert(m.find(4) == m.begin() + 2); + assert(m.find(5) == m.begin() + 3); + assert(m.find(6) == m.end()); + assert(m.find(7) == m.end()); + assert(std::as_const(m).find(8) == m.begin() + 4); + assert(std::as_const(m).find(9) == m.end()); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find_transparent.pass.cpp new file mode 100644 index 000000000000000..291577a89fc8f4d --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find_transparent.pass.cpp @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template iterator find(const K& x); +// template const_iterator find(const K& x) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type. +template +concept CanFind = requires(M m, Transparent k) { m.find(k); }; +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; +static_assert(CanFind); +static_assert(CanFind); +static_assert(!CanFind); +static_assert(!CanFind); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + + M m = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}}; + const auto& cm = m; + ASSERT_SAME_TYPE(decltype(m.find(Transparent{"abc"})), typename M::iterator); + ASSERT_SAME_TYPE(decltype(std::as_const(m).find(Transparent{"b"})), typename M::const_iterator); + + auto test_find = [&](auto&& map, const std::string& expected_key, long expected_offset) { + auto iter = map.find(Transparent{expected_key}); + assert(iter - map.begin() == expected_offset); + }; + + test_find(m, "alpha", 0); + test_find(m, "beta", 1); + test_find(m, "epsilon", 2); + test_find(m, "eta", 3); + test_find(m, "gamma", 4); + test_find(m, "charlie", 5); + test_find(m, "aaa", 5); + test_find(m, "zzz", 5); + test_find(cm, "alpha", 0); + test_find(cm, "beta", 1); + test_find(cm, "epsilon", 2); + test_find(cm, "eta", 3); + test_find(cm, "gamma", 4); + test_find(cm, "charlie", 5); + test_find(cm, "aaa", 5); + test_find(cm, "zzz", 5); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto it = m.find(Transparent{3}); + assert(it != m.end()); + assert(transparent_used); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound.pass.cpp new file mode 100644 index 000000000000000..b5491f3b226746c --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound.pass.cpp @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// iterator lower_bound(const key_type& k); +// const_iterator lower_bound(const key_type& k) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}}; + ASSERT_SAME_TYPE(decltype(m.lower_bound(0)), typename M::iterator); + ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(0)), typename M::const_iterator); + assert(m.lower_bound(0) == m.begin()); + assert(m.lower_bound(1) == m.begin()); + assert(m.lower_bound(2) == m.begin() + 1); + assert(m.lower_bound(3) == m.begin() + 2); + assert(m.lower_bound(4) == m.begin() + 2); + assert(m.lower_bound(5) == m.begin() + 3); + assert(m.lower_bound(6) == m.begin() + 4); + assert(m.lower_bound(7) == m.begin() + 4); + assert(std::as_const(m).lower_bound(8) == m.begin() + 4); + assert(std::as_const(m).lower_bound(9) == m.end()); + } + { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}}; + ASSERT_SAME_TYPE(decltype(m.lower_bound(0)), typename M::iterator); + ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(0)), typename M::const_iterator); + assert(m.lower_bound(0) == m.end()); + assert(m.lower_bound(1) == m.begin() + 4); + assert(m.lower_bound(2) == m.begin() + 3); + assert(m.lower_bound(3) == m.begin() + 3); + assert(m.lower_bound(4) == m.begin() + 2); + assert(m.lower_bound(5) == m.begin() + 1); + assert(m.lower_bound(6) == m.begin() + 1); + assert(m.lower_bound(7) == m.begin() + 1); + assert(std::as_const(m).lower_bound(8) == m.begin()); + assert(std::as_const(m).lower_bound(9) == m.begin()); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound_transparent.pass.cpp new file mode 100644 index 000000000000000..6a923c197e91eaa --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound_transparent.pass.cpp @@ -0,0 +1,95 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template iterator lower_bound(const K& x); +// template const_iterator lower_bound(const K& x) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type. +template +concept CanLowerBound = requires(M m, Transparent k) { m.lower_bound(k); }; +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; +static_assert(CanLowerBound); +static_assert(CanLowerBound); +static_assert(!CanLowerBound); +static_assert(!CanLowerBound); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + + M m = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}}; + const auto& cm = m; + ASSERT_SAME_TYPE(decltype(m.lower_bound(Transparent{"abc"})), typename M::iterator); + ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(Transparent{"b"})), typename M::const_iterator); + + auto test_lower_bound = [&](auto&& map, const std::string& expected_key, long expected_offset) { + auto iter = map.lower_bound(Transparent{expected_key}); + assert(iter - map.begin() == expected_offset); + }; + + test_lower_bound(m, "abc", 0); + test_lower_bound(m, "alpha", 0); + test_lower_bound(m, "beta", 1); + test_lower_bound(m, "bets", 2); + test_lower_bound(m, "charlie", 2); + test_lower_bound(m, "echo", 2); + test_lower_bound(m, "epsilon", 2); + test_lower_bound(m, "eta", 3); + test_lower_bound(m, "gamma", 4); + test_lower_bound(m, "golf", 5); + test_lower_bound(m, "zzz", 5); + + test_lower_bound(cm, "abc", 0); + test_lower_bound(cm, "alpha", 0); + test_lower_bound(cm, "beta", 1); + test_lower_bound(cm, "bets", 2); + test_lower_bound(cm, "charlie", 2); + test_lower_bound(cm, "echo", 2); + test_lower_bound(cm, "epsilon", 2); + test_lower_bound(cm, "eta", 3); + test_lower_bound(cm, "gamma", 4); + test_lower_bound(cm, "golf", 5); + test_lower_bound(cm, "zzz", 5); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto it = m.lower_bound(Transparent{3}); + assert(it != m.end()); + assert(transparent_used); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound.pass.cpp new file mode 100644 index 000000000000000..775e53286d6295d --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound.pass.cpp @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// iterator upper_bound(const key_type& k); +// const_iterator upper_bound(const key_type& k) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}}; + ASSERT_SAME_TYPE(decltype(m.upper_bound(0)), typename M::iterator); + ASSERT_SAME_TYPE(decltype(std::as_const(m).upper_bound(0)), typename M::const_iterator); + assert(m.upper_bound(0) == m.begin()); + assert(m.upper_bound(1) == m.begin() + 1); + assert(m.upper_bound(2) == m.begin() + 2); + assert(m.upper_bound(3) == m.begin() + 2); + assert(m.upper_bound(4) == m.begin() + 3); + assert(m.upper_bound(5) == m.begin() + 4); + assert(m.upper_bound(6) == m.begin() + 4); + assert(std::as_const(m).upper_bound(7) == m.begin() + 4); + assert(std::as_const(m).upper_bound(8) == m.end()); + assert(std::as_const(m).upper_bound(9) == m.end()); + } + + { + using M = std::flat_map, KeyContainer, ValueContainer>; + M m = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}}; + ASSERT_SAME_TYPE(decltype(m.upper_bound(0)), typename M::iterator); + ASSERT_SAME_TYPE(decltype(std::as_const(m).upper_bound(0)), typename M::const_iterator); + assert(m.upper_bound(0) == m.end()); + assert(m.upper_bound(1) == m.end()); + assert(m.upper_bound(2) == m.begin() + 4); + assert(m.upper_bound(3) == m.begin() + 3); + assert(m.upper_bound(4) == m.begin() + 3); + assert(m.upper_bound(5) == m.begin() + 2); + assert(m.upper_bound(6) == m.begin() + 1); + assert(m.upper_bound(7) == m.begin() + 1); + assert(std::as_const(m).upper_bound(8) == m.begin() + 1); + assert(std::as_const(m).upper_bound(9) == m.begin()); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound_transparent.pass.cpp new file mode 100644 index 000000000000000..4e83f920835dffc --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound_transparent.pass.cpp @@ -0,0 +1,94 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// template iterator upper_bound(const K& x); +// template const_iterator upper_bound(const K& x) const; + +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "../helpers.h" +#include "test_macros.h" +#include "min_allocator.h" + +// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type. +template +concept CanUpperBound = requires(M m, Transparent k) { m.upper_bound(k); }; +using TransparentMap = std::flat_map; +using NonTransparentMap = std::flat_map; +static_assert(CanUpperBound); +static_assert(CanUpperBound); +static_assert(!CanUpperBound); +static_assert(!CanUpperBound); + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + using M = std::flat_map; + + M m = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}}; + const auto& cm = m; + ASSERT_SAME_TYPE(decltype(m.lower_bound(Transparent{"abc"})), typename M::iterator); + ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(Transparent{"b"})), typename M::const_iterator); + + auto test_upper_bound = [&](auto&& map, const std::string& expected_key, long expected_offset) { + auto iter = map.upper_bound(Transparent{expected_key}); + assert(iter - map.begin() == expected_offset); + }; + + test_upper_bound(m, "abc", 0); + test_upper_bound(m, "alpha", 1); + test_upper_bound(m, "beta", 2); + test_upper_bound(m, "bets", 2); + test_upper_bound(m, "charlie", 2); + test_upper_bound(m, "echo", 2); + test_upper_bound(m, "epsilon", 3); + test_upper_bound(m, "eta", 4); + test_upper_bound(m, "gamma", 5); + test_upper_bound(m, "golf", 5); + test_upper_bound(m, "zzz", 5); + + test_upper_bound(cm, "abc", 0); + test_upper_bound(cm, "alpha", 1); + test_upper_bound(cm, "beta", 2); + test_upper_bound(cm, "bets", 2); + test_upper_bound(cm, "charlie", 2); + test_upper_bound(cm, "echo", 2); + test_upper_bound(cm, "epsilon", 3); + test_upper_bound(cm, "eta", 4); + test_upper_bound(cm, "gamma", 5); + test_upper_bound(cm, "golf", 5); + test_upper_bound(cm, "zzz", 5); +} + +int main(int, char**) { + test, std::vector>(); + test, std::vector>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + { + bool transparent_used = false; + TransparentComparator c(transparent_used); + std::flat_map m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c); + assert(!transparent_used); + auto it = m.upper_bound(Transparent{2}); + assert(it != m.end()); + assert(transparent_used); + } + + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/helpers.h b/libcxx/test/std/containers/container.adaptors/flat.map/helpers.h new file mode 100644 index 000000000000000..8dbb85a6c0acf13 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/helpers.h @@ -0,0 +1,394 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SUPPORT_FLAT_MAP_HELPERS_H +#define SUPPORT_FLAT_MAP_HELPERS_H + +#include +#include +#include +#include +#include + +#include "test_allocator.h" +#include "test_macros.h" + +template +void check_invariant(const std::flat_map& m) { + assert(m.keys().size() == m.values().size()); + const auto& keys = m.keys(); + assert(std::is_sorted(keys.begin(), keys.end(), m.key_comp())); + auto key_equal = [&](const auto& x, const auto& y) { + const auto& c = m.key_comp(); + return !c(x, y) && !c(y, x); + }; + assert(std::adjacent_find(keys.begin(), keys.end(), key_equal) == keys.end()); +} + +struct StartsWith { + explicit StartsWith(char ch) : lower_(1, ch), upper_(1, ch + 1) {} + StartsWith(const StartsWith&) = delete; + void operator=(const StartsWith&) = delete; + struct Less { + using is_transparent = void; + bool operator()(const std::string& a, const std::string& b) const { return a < b; } + bool operator()(const StartsWith& a, const std::string& b) const { return a.upper_ <= b; } + bool operator()(const std::string& a, const StartsWith& b) const { return a < b.lower_; } + bool operator()(const StartsWith&, const StartsWith&) const { + assert(false); // should not be called + return false; + } + }; + +private: + std::string lower_; + std::string upper_; +}; + +template +struct CopyOnlyVector : std::vector { + using std::vector::vector; + + CopyOnlyVector(const CopyOnlyVector&) = default; + CopyOnlyVector(CopyOnlyVector&& other) : CopyOnlyVector(other) {} + CopyOnlyVector(CopyOnlyVector&& other, std::vector::allocator_type alloc) : CopyOnlyVector(other, alloc) {} + + CopyOnlyVector& operator=(const CopyOnlyVector&) = default; + CopyOnlyVector& operator=(CopyOnlyVector& other) { return this->operator=(other); } +}; + +template +struct Transparent { + T t; + + operator T() const + requires ConvertibleToT + { + return t; + } +}; + +template +using ConvertibleTransparent = Transparent; + +template +using NonConvertibleTransparent = Transparent; + +struct TransparentComparator { + using is_transparent = void; + + bool* transparent_used = nullptr; + TransparentComparator() = default; + TransparentComparator(bool& used) : transparent_used(&used) {} + + template + bool operator()(const T& t, const Transparent& transparent) const { + if (transparent_used != nullptr) { + *transparent_used = true; + } + return t < transparent.t; + } + + template + bool operator()(const Transparent& transparent, const T& t) const { + if (transparent_used != nullptr) { + *transparent_used = true; + } + return transparent.t < t; + } + + template + bool operator()(const T& t1, const T& t2) const { + return t1 < t2; + } +}; + +struct NonTransparentComparator { + template + bool operator()(const T&, const Transparent&) const; + + template + bool operator()(const Transparent&, const T&) const; + + template + bool operator()(const T&, const T&) const; +}; + +struct NoDefaultCtr { + NoDefaultCtr() = delete; +}; + +#ifndef TEST_HAS_NO_EXCEPTIONS +template +struct EmplaceUnsafeContainer : std::vector { + using std::vector::vector; + + template + auto emplace(Args&&... args) -> decltype(std::declval>().emplace(std::forward(args)...)) { + if (this->size() > 1) { + auto it1 = this->begin(); + auto it2 = it1 + 1; + // messing up the container + std::iter_swap(it1, it2); + } + + throw 42; + } + + template + auto insert(Args&&... args) -> decltype(std::declval>().insert(std::forward(args)...)) { + if (this->size() > 1) { + auto it1 = this->begin(); + auto it2 = it1 + 1; + // messing up the container + std::iter_swap(it1, it2); + } + + throw 42; + } +}; + +template +struct ThrowOnEraseContainer : std::vector { + using std::vector::vector; + + template + auto erase(Args&&... args) -> decltype(std::declval>().erase(std::forward(args)...)) { + throw 42; + } +}; + +template +struct ThrowOnMoveContainer : std::vector { + using std::vector::vector; + + ThrowOnMoveContainer(ThrowOnMoveContainer&&) { throw 42; } + + ThrowOnMoveContainer& operator=(ThrowOnMoveContainer&&) { throw 42; } +}; + +#endif + +template +void test_emplace_exception_guarantee([[maybe_unused]] F&& emplace_function) { +#ifndef TEST_HAS_NO_EXCEPTIONS + using C = TransparentComparator; + { + // Throw on emplace the key, and underlying has strong exception guarantee + using KeyContainer = std::vector>; + using M = std::flat_map; + + LIBCPP_STATIC_ASSERT(std::__container_traits::__emplacement_has_strong_exception_safety_guarantee); + + test_allocator_statistics stats; + + KeyContainer a({1, 2, 3, 4}, test_allocator{&stats}); + std::vector b = {5, 6, 7, 8}; + [[maybe_unused]] auto expected_keys = a; + [[maybe_unused]] auto expected_values = b; + M m(std::sorted_unique, std::move(a), std::move(b)); + + stats.throw_after = 1; + try { + emplace_function(m, 0, 0); + assert(false); + } catch (const std::bad_alloc&) { + check_invariant(m); + // In libc++, the flat_map is unchanged + LIBCPP_ASSERT(m.size() == 4); + LIBCPP_ASSERT(m.keys() == expected_keys); + LIBCPP_ASSERT(m.values() == expected_values); + } + } + { + // Throw on emplace the key, and underlying has no strong exception guarantee + using KeyContainer = EmplaceUnsafeContainer; + using M = std::flat_map; + + LIBCPP_STATIC_ASSERT(!std::__container_traits::__emplacement_has_strong_exception_safety_guarantee); + KeyContainer a = {1, 2, 3, 4}; + std::vector b = {5, 6, 7, 8}; + M m(std::sorted_unique, std::move(a), std::move(b)); + try { + emplace_function(m, 0, 0); + assert(false); + } catch (int) { + check_invariant(m); + // In libc++, the flat_map is cleared + LIBCPP_ASSERT(m.size() == 0); + } + } + { + // Throw on emplace the value, and underlying has strong exception guarantee + using ValueContainer = std::vector>; + ; + using M = std::flat_map, ValueContainer>; + + LIBCPP_STATIC_ASSERT(std::__container_traits::__emplacement_has_strong_exception_safety_guarantee); + + std::vector a = {1, 2, 3, 4}; + test_allocator_statistics stats; + ValueContainer b({1, 2, 3, 4}, test_allocator{&stats}); + + [[maybe_unused]] auto expected_keys = a; + [[maybe_unused]] auto expected_values = b; + M m(std::sorted_unique, std::move(a), std::move(b)); + + stats.throw_after = 1; + try { + emplace_function(m, 0, 0); + assert(false); + } catch (const std::bad_alloc&) { + check_invariant(m); + // In libc++, the emplaced key is erased and the flat_map is unchanged + LIBCPP_ASSERT(m.size() == 4); + LIBCPP_ASSERT(m.keys() == expected_keys); + LIBCPP_ASSERT(m.values() == expected_values); + } + } + { + // Throw on emplace the value, and underlying has no strong exception guarantee + using ValueContainer = EmplaceUnsafeContainer; + using M = std::flat_map, ValueContainer>; + + LIBCPP_STATIC_ASSERT(!std::__container_traits::__emplacement_has_strong_exception_safety_guarantee); + std::vector a = {1, 2, 3, 4}; + ValueContainer b = {1, 2, 3, 4}; + + M m(std::sorted_unique, std::move(a), std::move(b)); + + try { + emplace_function(m, 0, 0); + assert(false); + } catch (int) { + check_invariant(m); + // In libc++, the flat_map is cleared + LIBCPP_ASSERT(m.size() == 0); + } + } + { + // Throw on emplace the value, then throw again on erasing the key + using KeyContainer = ThrowOnEraseContainer; + using ValueContainer = std::vector>; + using M = std::flat_map; + + LIBCPP_STATIC_ASSERT(std::__container_traits::__emplacement_has_strong_exception_safety_guarantee); + + KeyContainer a = {1, 2, 3, 4}; + test_allocator_statistics stats; + ValueContainer b({1, 2, 3, 4}, test_allocator{&stats}); + + M m(std::sorted_unique, std::move(a), std::move(b)); + stats.throw_after = 1; + try { + emplace_function(m, 0, 0); + assert(false); + } catch (const std::bad_alloc&) { + check_invariant(m); + // In libc++, we try to erase the key after value emplacement failure. + // and after erasure failure, we clear the flat_map + LIBCPP_ASSERT(m.size() == 0); + } + } +#endif +} + +template +void test_insert_range_exception_guarantee([[maybe_unused]] F&& insert_function) { +#ifndef TEST_HAS_NO_EXCEPTIONS + using KeyContainer = EmplaceUnsafeContainer; + using ValueContainer = std::vector; + using M = std::flat_map; + test_allocator_statistics stats; + KeyContainer a{1, 2, 3, 4}; + ValueContainer b{1, 2, 3, 4}; + M m(std::sorted_unique, std::move(a), std::move(b)); + + std::vector> newValues = {{0, 0}, {1, 1}, {5, 5}, {6, 6}, {7, 7}, {8, 8}}; + stats.throw_after = 1; + try { + insert_function(m, newValues); + assert(false); + } catch (int) { + check_invariant(m); + // In libc++, we clear if anything goes wrong when inserting a range + LIBCPP_ASSERT(m.size() == 0); + } +#endif +} + +template +void test_erase_exception_guarantee([[maybe_unused]] F&& erase_function) { +#ifndef TEST_HAS_NO_EXCEPTIONS + { + // key erase throws + using KeyContainer = ThrowOnEraseContainer; + using ValueContainer = std::vector; + using M = std::flat_map; + + KeyContainer a{1, 2, 3, 4}; + ValueContainer b{1, 2, 3, 4}; + M m(std::sorted_unique, std::move(a), std::move(b)); + try { + erase_function(m, 3); + assert(false); + } catch (int) { + check_invariant(m); + // In libc++, we clear if anything goes wrong when erasing + LIBCPP_ASSERT(m.size() == 0); + } + } + { + // key erase throws + using KeyContainer = std::vector; + using ValueContainer = ThrowOnEraseContainer; + using M = std::flat_map; + + KeyContainer a{1, 2, 3, 4}; + ValueContainer b{1, 2, 3, 4}; + M m(std::sorted_unique, std::move(a), std::move(b)); + try { + erase_function(m, 3); + assert(false); + } catch (int) { + check_invariant(m); + // In libc++, we clear if anything goes wrong when erasing + LIBCPP_ASSERT(m.size() == 0); + } + } +#endif +} +class Moveable { + int int_; + double double_; + +public: + Moveable() : int_(0), double_(0) {} + Moveable(int i, double d) : int_(i), double_(d) {} + Moveable(Moveable&& x) : int_(x.int_), double_(x.double_) { + x.int_ = -1; + x.double_ = -1; + } + Moveable& operator=(Moveable&& x) { + int_ = x.int_; + x.int_ = -1; + double_ = x.double_; + x.double_ = -1; + return *this; + } + + Moveable(const Moveable&) = delete; + Moveable& operator=(const Moveable&) = delete; + bool operator==(const Moveable& x) const { return int_ == x.int_ && double_ == x.double_; } + bool operator<(const Moveable& x) const { return int_ < x.int_ || (int_ == x.int_ && double_ < x.double_); } + + int get() const { return int_; } + bool moved() const { return int_ == -1; } +}; + +#endif // SUPPORT_FLAT_MAP_HELPERS_H diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp new file mode 100644 index 000000000000000..81c590ba73a1579 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// Check that std::flat_map and its iterators can be instantiated with an incomplete +// type. + +#include + +struct A { + using Map = std::flat_map; + int data; + Map m; + Map::iterator it; + Map::const_iterator cit; +}; + +// Implement the operator< required in order to instantiate flat_map +bool operator<(A const& L, A const& R) { return L.data < R.data; } + +int main(int, char**) { + A a; + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/op_compare.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/op_compare.pass.cpp new file mode 100644 index 000000000000000..fffe71158070407 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/op_compare.pass.cpp @@ -0,0 +1,118 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// + +// friend bool operator==(const flat_map& x, const flat_map& y); +// friend synth-three-way-result +// operator<=>(const flat_map& x, const flat_map& y); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MinSequenceContainer.h" +#include "test_macros.h" +#include "min_allocator.h" +#include "test_allocator.h" +#include "test_comparisons.h" +#include "test_container_comparisons.h" + +template +void test() { + using Key = typename KeyContainer::value_type; + using Value = typename ValueContainer::value_type; + + { + using C = std::flat_map; + C s1 = {{1, 1}}; + C s2 = {{2, 0}}; // {{1,1}} versus {{2,0}} + ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::strong_ordering); + AssertComparisonsReturnBool(); + assert(testComparisons(s1, s2, false, true)); + s2 = {{1, 1}}; // {{1,1}} versus {{1,1}} + assert(testComparisons(s1, s2, true, false)); + s2 = {{1, 1}, {2, 0}}; // {{1,1}} versus {{1,1},{2,0}} + assert(testComparisons(s1, s2, false, true)); + s1 = {{0, 0}, {1, 1}, {2, 2}}; // {{0,0},{1,1},{2,2}} versus {{1,1},{2,0}} + assert(testComparisons(s1, s2, false, true)); + s2 = {{0, 0}, {1, 1}, {2, 3}}; // {{0,0},{1,1},{2,2}} versus {{0,0},{1,1},{2,3}} + assert(testComparisons(s1, s2, false, true)); + } + { + // Comparisons use value_type's native operators, not the comparator + using C = std::flat_map>; + C s1 = {{1, 1}}; + C s2 = {{2, 0}}; // {{1,1}} versus {{2,0}} + ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::strong_ordering); + AssertComparisonsReturnBool(); + assert(testComparisons(s1, s2, false, true)); + s2 = {{1, 1}}; // {{1,1}} versus {{1,1}} + assert(testComparisons(s1, s2, true, false)); + s2 = {{1, 1}, {2, 0}}; // {{1,1}} versus {{2,0},{1,1}} + assert(testComparisons(s1, s2, false, true)); + s1 = {{0, 0}, {1, 1}, {2, 2}}; // {{2,2},{1,1},{0,0}} versus {2,0},{1,1}} + assert(testComparisons(s1, s2, false, false)); + s2 = {{0, 0}, {1, 1}, {2, 3}}; // {{2,2},{1,1},{0,0}} versus {{2,3},{1,1},{0,0}} + assert(testComparisons(s1, s2, false, true)); + } +} + +int main(int, char**) { + test, std::vector>(); + test, std::deque>(); + test, MinSequenceContainer>(); + test>, std::vector>>(); + test>, std::vector>>(); + + { + using C = std::flat_map; + C s1 = {{1, 1}}; + C s2 = C(std::sorted_unique, {{std::numeric_limits::quiet_NaN(), 2}}); + ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::partial_ordering); + AssertComparisonsReturnBool(); + assert(testComparisonsComplete(s1, s2, false, false, false)); + } + { + using C = std::flat_map; + C s1 = {{1, 1}}; + C s2 = C(std::sorted_unique, {{2, std::numeric_limits::quiet_NaN()}}); + ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::partial_ordering); + AssertComparisonsReturnBool(); + assert(testComparisonsComplete(s1, s2, false, true, false)); + s2 = C(std::sorted_unique, {{1, std::numeric_limits::quiet_NaN()}}); + assert(testComparisonsComplete(s1, s2, false, false, false)); + } + { + // Comparisons use value_type's native operators, not the comparator + struct StrongComp { + bool operator()(double a, double b) const { return std::strong_order(a, b) < 0; } + }; + using C = std::flat_map; + C s1 = {{1, 1}}; + C s2 = {{std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()}}; + ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::partial_ordering); + AssertComparisonsReturnBool(); + assert(testComparisonsComplete(s1, s2, false, false, false)); + s1 = {{{1, 1}, {std::numeric_limits::quiet_NaN(), 1}}}; + s2 = {{{std::numeric_limits::quiet_NaN(), 1}, {1, 1}}}; + assert(std::lexicographical_compare_three_way( + s1.keys().begin(), s1.keys().end(), s2.keys().begin(), s2.keys().end(), std::strong_order) == + std::strong_ordering::equal); + assert(s1 != s2); + assert((s1 <=> s2) == std::partial_ordering::unordered); + } + return 0; +} diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/types.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/types.compile.pass.cpp new file mode 100644 index 000000000000000..ea9d4d7fca67f00 --- /dev/null +++ b/libcxx/test/std/containers/container.adaptors/flat.map/types.compile.pass.cpp @@ -0,0 +1,133 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 + +// using key_type = Key; +// using mapped_type = T; +// using value_type = pair; +// using key_compare = Compare; +// using reference = pair; +// using const_reference = pair; +// using size_type = size_t; +// using difference_type = ptrdiff_t; +// using iterator = implementation-defined; // see [container.requirements] +// using const_iterator = implementation-defined; // see [container.requirements] +// using reverse_iterator = std::reverse_iterator; +// using const_reverse_iterator = std::reverse_iterator; +// using key_container_type = KeyContainer; +// using mapped_container_type = MappedContainer; + +// class value_compare; + +// struct containers { +// key_container_type keys; +// mapped_container_type values; +// }; + +#include +#include +#include +#include +#include +#include +#include +#include "min_allocator.h" + +void test() { + { + using M = std::flat_map; + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(requires { typename M::iterator; }); + static_assert(requires { typename M::const_iterator; }); + static_assert(std::is_same_v>); + static_assert( + std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(requires { typename M::value_compare; }); + static_assert(requires { typename M::containers; }); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + } + + { + struct A {}; + struct Compare { + bool operator()(const std::string&, const std::string&) const; + }; + using M = std::flat_map, std::deque>; + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v>); + static_assert(std::is_same_v); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(requires { typename M::iterator; }); + static_assert(requires { typename M::const_iterator; }); + static_assert(std::is_same_v>); + static_assert( + std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(requires { typename M::value_compare; }); + static_assert(requires { typename M::containers; }); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + } + { + using C = std::flat_map; + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(!std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::random_access_iterator); + static_assert(std::random_access_iterator); + static_assert(std::random_access_iterator); + static_assert(std::random_access_iterator); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + } + { + using C = std::flat_map, std::deque>>; + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(!std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + static_assert(std::random_access_iterator); + static_assert(std::random_access_iterator); + static_assert(std::random_access_iterator); + static_assert(std::random_access_iterator); + static_assert(std::is_same_v>); + static_assert(std::is_same_v>); + // size_type is invariably size_t + static_assert(std::is_same_v); + static_assert(std::is_same_v); + static_assert(std::is_same_v>>); + static_assert(std::is_same_v>); + } +} diff --git a/libcxx/test/support/MinSequenceContainer.h b/libcxx/test/support/MinSequenceContainer.h new file mode 100644 index 000000000000000..d0e29ae40c400d3 --- /dev/null +++ b/libcxx/test/support/MinSequenceContainer.h @@ -0,0 +1,83 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SUPPORT_MIN_SEQUENCE_CONTAINER_H +#define SUPPORT_MIN_SEQUENCE_CONTAINER_H + +#include +#include + +#include "test_iterators.h" + +template , class ConstIterator = random_access_iterator> +struct MinSequenceContainer { + using value_type = T; + using difference_type = int; + using size_type = unsigned int; + using iterator = Iterator; + using const_iterator = ConstIterator; + + explicit MinSequenceContainer() = default; + template + explicit MinSequenceContainer(It first, It last) : data_(first, last) {} + MinSequenceContainer(std::initializer_list il) : data_(il) {} + iterator begin() { return iterator(data_.data()); } + const_iterator begin() const { return const_iterator(data_.data()); } + const_iterator cbegin() const { return const_iterator(data_.data()); } + iterator end() { return begin() + size(); } + const_iterator end() const { return begin() + size(); } + size_type size() const { return data_.size(); } + bool empty() const { return data_.empty(); } + + void clear() { data_.clear(); } + + template + iterator insert(const_iterator p, It first, It last) { + return from_vector_iterator(data_.insert(to_vector_iterator(p), first, last)); + } + + iterator insert(const_iterator p, T value) { + return from_vector_iterator(data_.insert(to_vector_iterator(p), std::move(value))); + } + + iterator erase(const_iterator first, const_iterator last) { + return from_vector_iterator(data_.erase(to_vector_iterator(first), to_vector_iterator(last))); + } + + iterator erase(const_iterator iter) { return from_vector_iterator(data_.erase(to_vector_iterator(iter))); } + + template + iterator emplace(const_iterator pos, Args&&... args) { + return from_vector_iterator(data_.emplace(to_vector_iterator(pos), std::forward(args)...)); + } + +private: + std::vector::const_iterator to_vector_iterator(const_iterator cit) const { return cit - cbegin() + data_.begin(); } + + iterator from_vector_iterator(std::vector::iterator it) { return it - data_.begin() + begin(); } + + std::vector data_; +}; + +namespace MinSequenceContainer_detail { + +// MinSequenceContainer is non-allocator-aware, because flat_set supports +// such (non-STL) container types, and we want to make sure they are supported. +template +concept HasAllocatorType = requires { typename T::allocator_type; }; +static_assert(!HasAllocatorType>); + +// MinSequenceContainer by itself doesn't support .emplace(), because we want +// to at least somewhat support (non-STL) container types with nothing but .insert(). +template +concept HasEmplace = requires(T& t) { t.emplace(42); }; +static_assert(!HasEmplace>); + +} // namespace MinSequenceContainer_detail + +#endif // SUPPORT_MIN_SEQUENCE_CONTAINER_H diff --git a/libcxx/utils/libcxx/header_information.py b/libcxx/utils/libcxx/header_information.py index 528eb9995e19f4d..3b12dcb9f56c0b7 100644 --- a/libcxx/utils/libcxx/header_information.py +++ b/libcxx/utils/libcxx/header_information.py @@ -163,7 +163,6 @@ def __hash__(self) -> int: # modules will fail to build if a header is added but this list is not updated. headers_not_available = list(map(Header, [ "debugging", - "flat_map", "flat_set", "generator", "hazard_pointer", @@ -251,6 +250,7 @@ def __hash__(self) -> int: "coroutine": ["compare"], "deque": ["compare", "initializer_list"], "filesystem": ["compare"], + "flat_map": ["compare", "initializer_list"], "forward_list": ["compare", "initializer_list"], "ios": ["iosfwd"], "iostream": ["ios", "istream", "ostream", "streambuf"], From 98ca9a635bd2fb98cee473a9558687a5b522e219 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Sat, 26 Oct 2024 09:20:26 -0700 Subject: [PATCH 099/425] Reland [StructuralHash] Refactor (#112621) This is largely NFC, and it prepares for #112638. - Use stable_hash instead of uint64_t - Rename update* to hash* functions. They compute stable_hash locally and return it. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608. --- llvm/include/llvm/IR/StructuralHash.h | 7 +- llvm/lib/IR/StructuralHash.cpp | 133 +++++++++++------- llvm/lib/Transforms/IPO/MergeFunctions.cpp | 6 +- .../call-and-invoke-with-ranges-attr.ll | 12 +- .../MergeFunc/call-and-invoke-with-ranges.ll | 8 +- llvm/test/Transforms/MergeFunc/inline-asm.ll | 8 +- 6 files changed, 106 insertions(+), 68 deletions(-) diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 57fb45db8491109..e2e192cc9501b3a 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/StableHashing.h" #include namespace llvm { @@ -21,20 +22,18 @@ namespace llvm { class Function; class Module; -using IRHash = uint64_t; - /// Returns a hash of the function \p F. /// \param F The function to hash. /// \param DetailedHash Whether or not to encode additional information in the /// hash. The additional information added into the hash when this flag is set /// to true includes instruction and operand type information. -IRHash StructuralHash(const Function &F, bool DetailedHash = false); +stable_hash StructuralHash(const Function &F, bool DetailedHash = false); /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that /// composed the module hash. -IRHash StructuralHash(const Module &M, bool DetailedHash = false); +stable_hash StructuralHash(const Module &M, bool DetailedHash = false); } // end namespace llvm diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index fb4f33a021a96bc..267a085c5af7053 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -24,61 +24,93 @@ namespace { // by the MergeFunctions pass. class StructuralHashImpl { - uint64_t Hash = 4; + stable_hash Hash = 4; - void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + bool DetailedHash; + + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. + static constexpr stable_hash BlockHeaderHash = 45798; + static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; + static constexpr stable_hash GlobalHeaderHash = 23456; // This will produce different values on 32-bit and 64-bit systens as // hash_combine returns a size_t. However, this is only used for // detailed hashing which, in-tree, only needs to distinguish between // differences in functions. - template void hashArbitaryType(const T &V) { - hash(hash_combine(V)); + // TODO: This is not stable. + template stable_hash hashArbitaryType(const T &V) { + return hash_combine(V); } - void hashType(Type *ValueType) { - hash(ValueType->getTypeID()); + stable_hash hashType(Type *ValueType) { + SmallVector Hashes; + Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) - hash(ValueType->getIntegerBitWidth()); + Hashes.emplace_back(ValueType->getIntegerBitWidth()); + return stable_hash_combine(Hashes); } public: - StructuralHashImpl() = default; - - void updateOperand(Value *Operand) { - hashType(Operand->getType()); - - // The cases enumerated below are not exhaustive and are only aimed to - // get decent coverage over the function. - if (ConstantInt *ConstInt = dyn_cast(Operand)) { - hashArbitaryType(ConstInt->getValue()); - } else if (ConstantFP *ConstFP = dyn_cast(Operand)) { - hashArbitaryType(ConstFP->getValue()); - } else if (Argument *Arg = dyn_cast(Operand)) { - hash(Arg->getArgNo()); - } else if (Function *Func = dyn_cast(Operand)) { + StructuralHashImpl() = delete; + explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + + stable_hash hashConstant(Constant *C) { + SmallVector Hashes; + // TODO: hashArbitaryType() is not stable. + if (ConstantInt *ConstInt = dyn_cast(C)) { + Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); + } else if (ConstantFP *ConstFP = dyn_cast(C)) { + Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); + } else if (Function *Func = dyn_cast(C)) { // Hashing the name will be deterministic as LLVM's hashing infrastructure // has explicit support for hashing strings and will not simply hash // the pointer. - hashArbitaryType(Func->getName()); + Hashes.emplace_back(hashArbitaryType(Func->getName())); } + + return stable_hash_combine(Hashes); + } + + stable_hash hashValue(Value *V) { + // Check constant and return its hash. + Constant *C = dyn_cast(V); + if (C) + return hashConstant(C); + + // Hash argument number. + SmallVector Hashes; + if (Argument *Arg = dyn_cast(V)) + Hashes.emplace_back(Arg->getArgNo()); + + return stable_hash_combine(Hashes); } - void updateInstruction(const Instruction &Inst, bool DetailedHash) { - hash(Inst.getOpcode()); + stable_hash hashOperand(Value *Operand) { + SmallVector Hashes; + Hashes.emplace_back(hashType(Operand->getType())); + Hashes.emplace_back(hashValue(Operand)); + return stable_hash_combine(Hashes); + } + + stable_hash hashInstruction(const Instruction &Inst) { + SmallVector Hashes; + Hashes.emplace_back(Inst.getOpcode()); if (!DetailedHash) - return; + return stable_hash_combine(Hashes); - hashType(Inst.getType()); + Hashes.emplace_back(hashType(Inst.getType())); // Handle additional properties of specific instructions that cause // semantic differences in the IR. if (const auto *ComparisonInstruction = dyn_cast(&Inst)) - hash(ComparisonInstruction->getPredicate()); + Hashes.emplace_back(ComparisonInstruction->getPredicate()); for (const auto &Op : Inst.operands()) - updateOperand(Op); + Hashes.emplace_back(hashOperand(Op)); + + return stable_hash_combine(Hashes); } // A function hash is calculated by considering only the number of arguments @@ -97,15 +129,17 @@ class StructuralHashImpl { // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. - void update(const Function &F, bool DetailedHash) { + void update(const Function &F) { // Declarations don't affect analyses. if (F.isDeclaration()) return; - hash(0x62642d6b6b2d6b72); // Function header + SmallVector Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(FunctionHeaderHash); - hash(F.isVarArg()); - hash(F.arg_size()); + Hashes.emplace_back(F.isVarArg()); + Hashes.emplace_back(F.arg_size()); SmallVector BBs; SmallPtrSet VisitedBBs; @@ -118,17 +152,17 @@ class StructuralHashImpl { while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the - // opcodes - hash(45798); + Hashes.emplace_back(BlockHeaderHash); for (auto &Inst : *BB) - updateInstruction(Inst, DetailedHash); + Hashes.emplace_back(hashInstruction(Inst)); for (const BasicBlock *Succ : successors(BB)) if (VisitedBBs.insert(Succ).second) BBs.push_back(Succ); } + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } void update(const GlobalVariable &GV) { @@ -137,15 +171,20 @@ class StructuralHashImpl { // we ignore anything with the `.llvm` prefix if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) return; - hash(23456); // Global header - hash(GV.getValueType()->getTypeID()); + SmallVector Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(GlobalHeaderHash); + Hashes.emplace_back(GV.getValueType()->getTypeID()); + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } - void update(const Module &M, bool DetailedHash) { + void update(const Module &M) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F, DetailedHash); + update(F); } uint64_t getHash() const { return Hash; } @@ -153,14 +192,14 @@ class StructuralHashImpl { } // namespace -IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { - StructuralHashImpl H; - H.update(F, DetailedHash); +stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { + StructuralHashImpl H(DetailedHash); + H.update(F); return H.getHash(); } -IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { - StructuralHashImpl H; - H.update(M, DetailedHash); +stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { + StructuralHashImpl H(DetailedHash); + H.update(M); return H.getHash(); } diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index b50a700e09038f1..ad16b0b3501495e 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -172,14 +172,14 @@ namespace { class FunctionNode { mutable AssertingVH F; - IRHash Hash; + stable_hash Hash; public: // Note the hash is recalculated potentially multiple times, but it is cheap. FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {} Function *getFunc() const { return F; } - IRHash getHash() const { return Hash; } + stable_hash getHash() const { return Hash; } /// Replace the reference to the function F by the function G, assuming their /// implementations are equal. @@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) { // All functions in the module, ordered by hash. Functions with a unique // hash value are easily eliminated. - std::vector> HashedFuncs; + std::vector> HashedFuncs; for (Function &Func : M) { if (isEligibleForMerging(Func)) { HashedFuncs.push_back({StructuralHash(Func), &Func}); diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll index e5d62319bf9db7b..cbf14165548ec53 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll @@ -80,8 +80,8 @@ lpad: } define i8 @invoke_with_same_range() personality ptr undef { -; CHECK-LABEL: @invoke_with_same_range() -; CHECK: tail call i8 @invoke_with_range() +; CHECK-DAG: @invoke_with_same_range() +; CHECK-DAG: tail call i8 @invoke_with_range() %out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad next: @@ -93,15 +93,15 @@ lpad: } define i8 @call_with_same_range() { -; CHECK-LABEL: @call_with_same_range -; CHECK: tail call i8 @call_with_range +; CHECK-DAG: @call_with_same_range +; CHECK-DAG: tail call i8 @call_with_range %out = call range(i8 0, 2) i8 @dummy() ret i8 %out } define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) { -; CHECK-LABEL: @call_with_same_range_attr -; CHECK: tail call i8 @call_with_range_attr +; CHECK-DAG: @call_with_same_range_attr +; CHECK-DAG: tail call i8 @call_with_range_attr %out = call i8 @dummy2(i8 %v) ret i8 %out } diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll index e7718ca84d31657..39e5a11181a4f03 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll @@ -64,8 +64,8 @@ lpad: } define i8 @invoke_with_same_range() personality ptr undef { -; CHECK-LABEL: @invoke_with_same_range() -; CHECK: tail call i8 @invoke_with_range() +; CHECK-DAG: @invoke_with_same_range() +; CHECK-DAG: tail call i8 @invoke_with_range() %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0 next: @@ -77,8 +77,8 @@ lpad: } define i8 @call_with_same_range() { -; CHECK-LABEL: @call_with_same_range -; CHECK: tail call i8 @call_with_range +; CHECK-DAG: @call_with_same_range +; CHECK-DAG: tail call i8 @call_with_range bitcast i8 0 to i8 %out = call i8 @dummy(), !range !0 ret i8 %out diff --git a/llvm/test/Transforms/MergeFunc/inline-asm.ll b/llvm/test/Transforms/MergeFunc/inline-asm.ll index 7cc6afd2f8f7bdc..970757e8d53afbb 100644 --- a/llvm/test/Transforms/MergeFunc/inline-asm.ll +++ b/llvm/test/Transforms/MergeFunc/inline-asm.ll @@ -3,11 +3,11 @@ ; CHECK-LABEL: @int_ptr_arg_different ; CHECK-NEXT: call void asm -; CHECK-LABEL: @int_ptr_null -; CHECK-NEXT: tail call void @float_ptr_null() +; CHECK-DAG: @int_ptr_null +; CHECK-DAG: tail call void @float_ptr_null() -; CHECK-LABEL: @int_ptr_arg_same -; CHECK-NEXT: tail call void @float_ptr_arg_same(ptr %0) +; CHECK-DAG: @int_ptr_arg_same +; CHECK-DAG: tail call void @float_ptr_arg_same(ptr %0) ; Used to satisfy minimum size limit declare void @stuff() From d78d030b31bb3bde822115fd9d26ab887f890203 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 26 Oct 2024 12:26:11 -0700 Subject: [PATCH 100/425] [MC] Export llvm::XCOFFObjectwriter and access it from MCXCOFFStreamer Similar to commit 9539a7796094ff5fb59d9c685140ea2e214b945c for WinCOFFObjectWriter. Close #100412 --- llvm/include/llvm/MC/MCXCOFFObjectWriter.h | 17 ++- llvm/include/llvm/MC/MCXCOFFStreamer.h | 3 + llvm/lib/MC/MCXCOFFStreamer.cpp | 12 +- llvm/lib/MC/XCOFFObjectWriter.cpp | 149 +++++++++------------ 4 files changed, 83 insertions(+), 98 deletions(-) diff --git a/llvm/include/llvm/MC/MCXCOFFObjectWriter.h b/llvm/include/llvm/MC/MCXCOFFObjectWriter.h index c0e32a70172d882..968d938a6549815 100644 --- a/llvm/include/llvm/MC/MCXCOFFObjectWriter.h +++ b/llvm/include/llvm/MC/MCXCOFFObjectWriter.h @@ -39,19 +39,18 @@ class MCXCOFFObjectTargetWriter : public MCObjectTargetWriter { bool Is64Bit; }; +class XCOFFObjectWriter : public MCObjectWriter { +public: + virtual void addExceptionEntry(const MCSymbol *Symbol, const MCSymbol *Trap, + unsigned LanguageCode, unsigned ReasonCode, + unsigned FunctionSize, bool hasDebug) = 0; + virtual void addCInfoSymEntry(StringRef Name, StringRef Metadata) = 0; +}; + std::unique_ptr createXCOFFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS); -namespace XCOFF { -void addExceptionEntry(MCObjectWriter &Writer, const MCSymbol *Symbol, - const MCSymbol *Trap, unsigned LanguageCode, - unsigned ReasonCode, unsigned FunctionSize, - bool hasDebug); -void addCInfoSymEntry(MCObjectWriter &Writer, StringRef Name, - StringRef Metadata); -} // namespace XCOFF - } // end namespace llvm #endif // LLVM_MC_MCXCOFFOBJECTWRITER_H diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h index 8cae64fa33be050..3c7abfea85ecf5a 100644 --- a/llvm/include/llvm/MC/MCXCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h @@ -12,6 +12,7 @@ #include "llvm/MC/MCObjectStreamer.h" namespace llvm { +class XCOFFObjectWriter; class MCXCOFFStreamer : public MCObjectStreamer { public: @@ -19,6 +20,8 @@ class MCXCOFFStreamer : public MCObjectStreamer { std::unique_ptr OW, std::unique_ptr Emitter); + XCOFFObjectWriter &getWriter(); + bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, Align ByteAlignment) override; diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index 94aa1ebc8f9e116..f5b83f29352ca7f 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -32,6 +32,10 @@ MCXCOFFStreamer::MCXCOFFStreamer(MCContext &Context, : MCObjectStreamer(Context, std::move(MAB), std::move(OW), std::move(Emitter)) {} +XCOFFObjectWriter &MCXCOFFStreamer::getWriter() { + return static_cast(getAssembler().getWriter()); +} + bool MCXCOFFStreamer::emitSymbolAttribute(MCSymbol *Sym, MCSymbolAttr Attribute) { auto *Symbol = cast(Sym); @@ -109,14 +113,12 @@ void MCXCOFFStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol, unsigned Lang, unsigned Reason, unsigned FunctionSize, bool hasDebug) { - // TODO: Export XCOFFObjectWriter to llvm/MC/MCXCOFFObjectWriter.h and access - // it from MCXCOFFStreamer. - XCOFF::addExceptionEntry(getAssembler().getWriter(), Symbol, Trap, Lang, - Reason, FunctionSize, hasDebug); + getWriter().addExceptionEntry(Symbol, Trap, Lang, Reason, FunctionSize, + hasDebug); } void MCXCOFFStreamer::emitXCOFFCInfoSym(StringRef Name, StringRef Metadata) { - XCOFF::addCInfoSymEntry(getAssembler().getWriter(), Name, Metadata); + getWriter().addCInfoSymEntry(Name, Metadata); } void MCXCOFFStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index c7f29c73eaac093..5d8f3dbdaadad54 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -290,8 +290,7 @@ struct CInfoSymSectionEntry : public SectionEntry { } }; -class XCOFFObjectWriter : public MCObjectWriter { - +class XCOFFWriter final : public XCOFFObjectWriter { uint32_t SymbolTableEntryCount = 0; uint64_t SymbolTableOffset = 0; uint16_t SectionCount = 0; @@ -433,8 +432,8 @@ class XCOFFObjectWriter : public MCObjectWriter { } public: - XCOFFObjectWriter(std::unique_ptr MOTW, - raw_pwrite_stream &OS); + XCOFFWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS); void writeWord(uint64_t Word) { is64Bit() ? W.write(Word) : W.write(Word); @@ -442,12 +441,12 @@ class XCOFFObjectWriter : public MCObjectWriter { void addExceptionEntry(const MCSymbol *Symbol, const MCSymbol *Trap, unsigned LanguageCode, unsigned ReasonCode, - unsigned FunctionSize, bool hasDebug); - void addCInfoSymEntry(StringRef Name, StringRef Metadata); + unsigned FunctionSize, bool hasDebug) override; + void addCInfoSymEntry(StringRef Name, StringRef Metadata) override; }; -XCOFFObjectWriter::XCOFFObjectWriter( - std::unique_ptr MOTW, raw_pwrite_stream &OS) +XCOFFWriter::XCOFFWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS) : W(OS, llvm::endianness::big), TargetObjectWriter(std::move(MOTW)), Strings(StringTableBuilder::XCOFF), Text(".text", XCOFF::STYP_TEXT, /* IsVirtual */ false, @@ -463,7 +462,7 @@ XCOFFObjectWriter::XCOFFObjectWriter( ExceptionSection(".except", XCOFF::STYP_EXCEPT), CInfoSymSection(".info", XCOFF::STYP_INFO) {} -void XCOFFObjectWriter::reset() { +void XCOFFWriter::reset() { // Clear the mappings we created. SymbolIndexMap.clear(); SectionMap.clear(); @@ -479,7 +478,7 @@ void XCOFFObjectWriter::reset() { ExceptionSection.reset(); CInfoSymSection.reset(); - // Reset states in XCOFFObjectWriter. + // Reset states in XCOFFWriter. SymbolTableEntryCount = 0; SymbolTableOffset = 0; SectionCount = 0; @@ -489,7 +488,7 @@ void XCOFFObjectWriter::reset() { MCObjectWriter::reset(); } -CsectGroup &XCOFFObjectWriter::getCsectGroup(const MCSectionXCOFF *MCSec) { +CsectGroup &XCOFFWriter::getCsectGroup(const MCSectionXCOFF *MCSec) { switch (MCSec->getMappingClass()) { case XCOFF::XMC_PR: assert(XCOFF::XTY_SD == MCSec->getCSectType() && @@ -556,7 +555,7 @@ static MCSectionXCOFF *getContainingCsect(const MCSymbolXCOFF *XSym) { return XSym->getRepresentedCsect(); } -void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm) { +void XCOFFWriter::executePostLayoutBinding(MCAssembler &Asm) { for (const auto &S : Asm) { const auto *MCSec = cast(&S); assert(!SectionMap.contains(MCSec) && "Cannot add a section twice."); @@ -657,10 +656,9 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm) { assignAddressesAndIndices(Asm); } -void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm, - const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { +void XCOFFWriter::recordRelocation(MCAssembler &Asm, const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { auto getIndex = [this](const MCSymbol *Sym, const MCSectionXCOFF *ContainingCsect) { // If we could not find the symbol directly in SymbolIndexMap, this symbol @@ -812,7 +810,7 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm, FixedValue -= getVirtualAddress(SymB, SymBSec); } -void XCOFFObjectWriter::writeSections(const MCAssembler &Asm) { +void XCOFFWriter::writeSections(const MCAssembler &Asm) { uint64_t CurrentAddressLocation = 0; for (const auto *Section : Sections) writeSectionForControlSectionEntry(Asm, *Section, CurrentAddressLocation); @@ -824,7 +822,7 @@ void XCOFFObjectWriter::writeSections(const MCAssembler &Asm) { CurrentAddressLocation); } -uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm) { +uint64_t XCOFFWriter::writeObject(MCAssembler &Asm) { // We always emit a timestamp of 0 for reproducibility, so ensure incremental // linking is not enabled, in case, like with Windows COFF, such a timestamp // is incompatible with incremental linking of XCOFF. @@ -844,11 +842,11 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm) { return W.OS.tell() - StartOffset; } -bool XCOFFObjectWriter::nameShouldBeInStringTable(const StringRef &SymbolName) { +bool XCOFFWriter::nameShouldBeInStringTable(const StringRef &SymbolName) { return SymbolName.size() > XCOFF::NameSize || is64Bit(); } -void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) { +void XCOFFWriter::writeSymbolName(const StringRef &SymbolName) { // Magic, Offset or SymbolName. if (nameShouldBeInStringTable(SymbolName)) { W.write(0); @@ -861,11 +859,10 @@ void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) { } } -void XCOFFObjectWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value, - int16_t SectionNumber, - uint16_t SymbolType, - uint8_t StorageClass, - uint8_t NumberOfAuxEntries) { +void XCOFFWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value, + int16_t SectionNumber, uint16_t SymbolType, + uint8_t StorageClass, + uint8_t NumberOfAuxEntries) { if (is64Bit()) { W.write(Value); W.write(Strings.getOffset(SymbolName)); @@ -879,9 +876,9 @@ void XCOFFObjectWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value, W.write(NumberOfAuxEntries); } -void XCOFFObjectWriter::writeSymbolAuxCsectEntry(uint64_t SectionOrLength, - uint8_t SymbolAlignmentAndType, - uint8_t StorageMappingClass) { +void XCOFFWriter::writeSymbolAuxCsectEntry(uint64_t SectionOrLength, + uint8_t SymbolAlignmentAndType, + uint8_t StorageMappingClass) { W.write(is64Bit() ? Lo_32(SectionOrLength) : SectionOrLength); W.write(0); // ParameterHashIndex W.write(0); // TypeChkSectNum @@ -897,12 +894,12 @@ void XCOFFObjectWriter::writeSymbolAuxCsectEntry(uint64_t SectionOrLength, } } -bool XCOFFObjectWriter::auxFileSymNameShouldBeInStringTable( +bool XCOFFWriter::auxFileSymNameShouldBeInStringTable( const StringRef &SymbolName) { return SymbolName.size() > XCOFF::AuxFileEntNameSize; } -void XCOFFObjectWriter::writeAuxFileSymName(const StringRef &SymbolName) { +void XCOFFWriter::writeAuxFileSymName(const StringRef &SymbolName) { // Magic, Offset or SymbolName. if (auxFileSymNameShouldBeInStringTable(SymbolName)) { W.write(0); @@ -916,8 +913,7 @@ void XCOFFObjectWriter::writeAuxFileSymName(const StringRef &SymbolName) { } } -void XCOFFObjectWriter::writeSymbolAuxFileEntry(StringRef &Name, - uint8_t ftype) { +void XCOFFWriter::writeSymbolAuxFileEntry(StringRef &Name, uint8_t ftype) { writeAuxFileSymName(Name); W.write(ftype); W.OS.write_zeros(2); @@ -927,8 +923,8 @@ void XCOFFObjectWriter::writeSymbolAuxFileEntry(StringRef &Name, W.OS.write_zeros(1); } -void XCOFFObjectWriter::writeSymbolAuxDwarfEntry( - uint64_t LengthOfSectionPortion, uint64_t NumberOfRelocEnt) { +void XCOFFWriter::writeSymbolAuxDwarfEntry(uint64_t LengthOfSectionPortion, + uint64_t NumberOfRelocEnt) { writeWord(LengthOfSectionPortion); if (!is64Bit()) W.OS.write_zeros(4); // Reserved @@ -941,7 +937,7 @@ void XCOFFObjectWriter::writeSymbolAuxDwarfEntry( } } -void XCOFFObjectWriter::writeSymbolEntryForCsectMemberLabel( +void XCOFFWriter::writeSymbolEntryForCsectMemberLabel( const Symbol &SymbolRef, const XCOFFSection &CSectionRef, int16_t SectionIndex, uint64_t SymbolOffset) { assert(SymbolOffset <= MaxRawDataSize - CSectionRef.Address && @@ -986,7 +982,7 @@ void XCOFFObjectWriter::writeSymbolEntryForCsectMemberLabel( CSectionRef.MCSec->getMappingClass()); } -void XCOFFObjectWriter::writeSymbolEntryForDwarfSection( +void XCOFFWriter::writeSymbolEntryForDwarfSection( const XCOFFSection &DwarfSectionRef, int16_t SectionIndex) { assert(DwarfSectionRef.MCSec->isDwarfSect() && "Not a DWARF section!"); @@ -996,7 +992,7 @@ void XCOFFObjectWriter::writeSymbolEntryForDwarfSection( writeSymbolAuxDwarfEntry(DwarfSectionRef.Size); } -void XCOFFObjectWriter::writeSymbolEntryForControlSection( +void XCOFFWriter::writeSymbolEntryForControlSection( const XCOFFSection &CSectionRef, int16_t SectionIndex, XCOFF::StorageClass StorageClass) { writeSymbolEntry(CSectionRef.getSymbolTableName(), CSectionRef.Address, @@ -1006,10 +1002,10 @@ void XCOFFObjectWriter::writeSymbolEntryForControlSection( CSectionRef.MCSec->getMappingClass()); } -void XCOFFObjectWriter::writeSymbolAuxFunctionEntry(uint32_t EntryOffset, - uint32_t FunctionSize, - uint64_t LineNumberPointer, - uint32_t EndIndex) { +void XCOFFWriter::writeSymbolAuxFunctionEntry(uint32_t EntryOffset, + uint32_t FunctionSize, + uint64_t LineNumberPointer, + uint32_t EndIndex) { if (is64Bit()) writeWord(LineNumberPointer); else @@ -1026,9 +1022,9 @@ void XCOFFObjectWriter::writeSymbolAuxFunctionEntry(uint32_t EntryOffset, } } -void XCOFFObjectWriter::writeSymbolAuxExceptionEntry(uint64_t EntryOffset, - uint32_t FunctionSize, - uint32_t EndIndex) { +void XCOFFWriter::writeSymbolAuxExceptionEntry(uint64_t EntryOffset, + uint32_t FunctionSize, + uint32_t EndIndex) { assert(is64Bit() && "Exception auxilliary entries are 64-bit only."); W.write(EntryOffset); W.write(FunctionSize); @@ -1037,7 +1033,7 @@ void XCOFFObjectWriter::writeSymbolAuxExceptionEntry(uint64_t EntryOffset, W.write(XCOFF::AUX_EXCEPT); } -void XCOFFObjectWriter::writeFileHeader() { +void XCOFFWriter::writeFileHeader() { W.write(is64Bit() ? XCOFF::XCOFF64 : XCOFF::XCOFF32); W.write(SectionCount); W.write(0); // TimeStamp @@ -1053,7 +1049,7 @@ void XCOFFObjectWriter::writeFileHeader() { } } -void XCOFFObjectWriter::writeAuxFileHeader() { +void XCOFFWriter::writeAuxFileHeader() { if (!auxiliaryHeaderSize()) return; W.write(0); // Magic @@ -1069,7 +1065,7 @@ void XCOFFObjectWriter::writeAuxFileHeader() { W.write(Sections[1]->Address); // DataStartAddr } -void XCOFFObjectWriter::writeSectionHeader(const SectionEntry *Sec) { +void XCOFFWriter::writeSectionHeader(const SectionEntry *Sec) { bool IsDwarf = (Sec->Flags & XCOFF::STYP_DWARF) != 0; bool IsOvrflo = (Sec->Flags & XCOFF::STYP_OVRFLO) != 0; // Nothing to write for this Section. @@ -1109,7 +1105,7 @@ void XCOFFObjectWriter::writeSectionHeader(const SectionEntry *Sec) { } } -void XCOFFObjectWriter::writeSectionHeaderTable() { +void XCOFFWriter::writeSectionHeaderTable() { for (const auto *CsectSec : Sections) writeSectionHeader(CsectSec); for (const auto &DwarfSec : DwarfSections) @@ -1122,8 +1118,8 @@ void XCOFFObjectWriter::writeSectionHeaderTable() { writeSectionHeader(&CInfoSymSection); } -void XCOFFObjectWriter::writeRelocation(XCOFFRelocation Reloc, - const XCOFFSection &Section) { +void XCOFFWriter::writeRelocation(XCOFFRelocation Reloc, + const XCOFFSection &Section) { if (Section.MCSec->isCsect()) writeWord(Section.Address + Reloc.FixupOffsetInCsect); else { @@ -1136,7 +1132,7 @@ void XCOFFObjectWriter::writeRelocation(XCOFFRelocation Reloc, W.write(Reloc.Type); } -void XCOFFObjectWriter::writeRelocations() { +void XCOFFWriter::writeRelocations() { for (const auto *Section : Sections) { if (Section->Index == SectionEntry::UninitializedIndex) // Nothing to write for this Section. @@ -1158,7 +1154,7 @@ void XCOFFObjectWriter::writeRelocations() { writeRelocation(Reloc, *DwarfSection.DwarfSect); } -void XCOFFObjectWriter::writeSymbolTable(MCAssembler &Asm) { +void XCOFFWriter::writeSymbolTable(MCAssembler &Asm) { // Write C_FILE symbols. StringRef Vers = CompilerVersion; @@ -1239,8 +1235,7 @@ void XCOFFObjectWriter::writeSymbolTable(MCAssembler &Asm) { DwarfSection.Index); } -void XCOFFObjectWriter::finalizeRelocationInfo(SectionEntry *Sec, - uint64_t RelCount) { +void XCOFFWriter::finalizeRelocationInfo(SectionEntry *Sec, uint64_t RelCount) { // Handles relocation field overflows in an XCOFF32 file. An XCOFF64 file // may not contain an overflow section header. if (!is64Bit() && (RelCount >= static_cast(XCOFF::RelocOverflow))) { @@ -1265,8 +1260,8 @@ void XCOFFObjectWriter::finalizeRelocationInfo(SectionEntry *Sec, } } -void XCOFFObjectWriter::calcOffsetToRelocations(SectionEntry *Sec, - uint64_t &RawPointer) { +void XCOFFWriter::calcOffsetToRelocations(SectionEntry *Sec, + uint64_t &RawPointer) { if (!Sec->RelocationCount) return; @@ -1297,7 +1292,7 @@ void XCOFFObjectWriter::calcOffsetToRelocations(SectionEntry *Sec, report_fatal_error("Relocation data overflowed this object file."); } -void XCOFFObjectWriter::finalizeSectionInfo() { +void XCOFFWriter::finalizeSectionInfo() { for (auto *Section : Sections) { if (Section->Index == SectionEntry::UninitializedIndex) // Nothing to record for this Section. @@ -1361,9 +1356,10 @@ void XCOFFObjectWriter::finalizeSectionInfo() { SymbolTableOffset = RawPointer; } -void XCOFFObjectWriter::addExceptionEntry( - const MCSymbol *Symbol, const MCSymbol *Trap, unsigned LanguageCode, - unsigned ReasonCode, unsigned FunctionSize, bool hasDebug) { +void XCOFFWriter::addExceptionEntry(const MCSymbol *Symbol, + const MCSymbol *Trap, unsigned LanguageCode, + unsigned ReasonCode, unsigned FunctionSize, + bool hasDebug) { // If a module had debug info, debugging is enabled and XCOFF emits the // exception auxilliary entry. if (hasDebug) @@ -1383,7 +1379,7 @@ void XCOFFObjectWriter::addExceptionEntry( std::pair(Symbol->getName(), NewEntry)); } -unsigned XCOFFObjectWriter::getExceptionSectionSize() { +unsigned XCOFFWriter::getExceptionSectionSize() { unsigned EntryNum = 0; for (const auto &TableEntry : ExceptionSection.ExceptionTable) @@ -1395,7 +1391,7 @@ unsigned XCOFFObjectWriter::getExceptionSectionSize() { : XCOFF::ExceptionSectionEntrySize32); } -unsigned XCOFFObjectWriter::getExceptionOffset(const MCSymbol *Symbol) { +unsigned XCOFFWriter::getExceptionOffset(const MCSymbol *Symbol) { unsigned EntryNum = 0; for (const auto &TableEntry : ExceptionSection.ExceptionTable) { if (Symbol == TableEntry.second.FunctionSymbol) @@ -1406,13 +1402,13 @@ unsigned XCOFFObjectWriter::getExceptionOffset(const MCSymbol *Symbol) { : XCOFF::ExceptionSectionEntrySize32); } -void XCOFFObjectWriter::addCInfoSymEntry(StringRef Name, StringRef Metadata) { +void XCOFFWriter::addCInfoSymEntry(StringRef Name, StringRef Metadata) { assert(!CInfoSymSection.Entry && "Multiple entries are not supported"); CInfoSymSection.addEntry( std::make_unique(Name.str(), Metadata.str())); } -void XCOFFObjectWriter::assignAddressesAndIndices(MCAssembler &Asm) { +void XCOFFWriter::assignAddressesAndIndices(MCAssembler &Asm) { // The symbol table starts with all the C_FILE symbols. Each C_FILE symbol // requires 1 or 2 auxiliary entries. uint32_t SymbolTableIndex = @@ -1588,7 +1584,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(MCAssembler &Asm) { SymbolTableEntryCount = SymbolTableIndex; } -void XCOFFObjectWriter::writeSectionForControlSectionEntry( +void XCOFFWriter::writeSectionForControlSectionEntry( const MCAssembler &Asm, const CsectSectionEntry &CsectEntry, uint64_t &CurrentAddressLocation) { // Nothing to write for this Section. @@ -1635,7 +1631,7 @@ void XCOFFObjectWriter::writeSectionForControlSectionEntry( } } -void XCOFFObjectWriter::writeSectionForDwarfSectionEntry( +void XCOFFWriter::writeSectionForDwarfSectionEntry( const MCAssembler &Asm, const DwarfSectionEntry &DwarfEntry, uint64_t &CurrentAddressLocation) { // There could be a gap (without corresponding zero padding) between @@ -1663,7 +1659,7 @@ void XCOFFObjectWriter::writeSectionForDwarfSectionEntry( CurrentAddressLocation += TailPaddingSize; } -void XCOFFObjectWriter::writeSectionForExceptionSectionEntry( +void XCOFFWriter::writeSectionForExceptionSectionEntry( const MCAssembler &Asm, ExceptionSectionEntry &ExceptionEntry, uint64_t &CurrentAddressLocation) { for (const auto &TableEntry : ExceptionEntry.ExceptionTable) { @@ -1685,7 +1681,7 @@ void XCOFFObjectWriter::writeSectionForExceptionSectionEntry( CurrentAddressLocation += getExceptionSectionSize(); } -void XCOFFObjectWriter::writeSectionForCInfoSymSectionEntry( +void XCOFFWriter::writeSectionForCInfoSymSectionEntry( const MCAssembler &Asm, CInfoSymSectionEntry &CInfoSymEntry, uint64_t &CurrentAddressLocation) { if (!CInfoSymSection.Entry) @@ -1737,20 +1733,5 @@ uint8_t getEncodedType(const MCSectionXCOFF *Sec) { std::unique_ptr llvm::createXCOFFObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) { - return std::make_unique(std::move(MOTW), OS); -} - -// TODO: Export XCOFFObjectWriter to llvm/MC/MCXCOFFObjectWriter.h and remove -// the forwarders. -void XCOFF::addExceptionEntry(MCObjectWriter &Writer, const MCSymbol *Symbol, - const MCSymbol *Trap, unsigned LanguageCode, - unsigned ReasonCode, unsigned FunctionSize, - bool hasDebug) { - static_cast(Writer).addExceptionEntry( - Symbol, Trap, LanguageCode, ReasonCode, FunctionSize, hasDebug); -} - -void XCOFF::addCInfoSymEntry(MCObjectWriter &Writer, StringRef Name, - StringRef Metadata) { - static_cast(Writer).addCInfoSymEntry(Name, Metadata); + return std::make_unique(std::move(MOTW), OS); } From 64d7e45c40e75846d13f4f7a853f9a3ea3493faa Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Sat, 26 Oct 2024 21:38:54 +0200 Subject: [PATCH 101/425] Revert "[flang][debug] Support mlir::NoneType." (#113769) Reverts llvm/llvm-project#113550 It turns out this causes compiler crashes with assumed-type arrays and -g. See https://github.com/llvm/llvm-project/pull/113769 for a reproducer. --- .../Optimizer/Transforms/DebugTypeGenerator.cpp | 2 -- flang/test/Transforms/debug-none-type.fir | 14 -------------- 2 files changed, 16 deletions(-) delete mode 100644 flang/test/Transforms/debug-none-type.fir diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index e387e27533a0060..1ab6c76dae8eda7 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -581,8 +581,6 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, /*genAssociated=*/false); } else if (auto vecTy = mlir::dyn_cast_or_null(Ty)) { return convertVectorType(vecTy, fileAttr, scope, declOp); - } else if (mlir::isa(Ty)) { - return mlir::LLVM::DINullTypeAttr::get(context); } else if (auto boxTy = mlir::dyn_cast_or_null(Ty)) { auto elTy = boxTy.getElementType(); if (auto seqTy = mlir::dyn_cast_or_null(elTy)) diff --git a/flang/test/Transforms/debug-none-type.fir b/flang/test/Transforms/debug-none-type.fir deleted file mode 100644 index 5eee6a63321904f..000000000000000 --- a/flang/test/Transforms/debug-none-type.fir +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s - -module attributes {dlti.dl_spec = #dlti.dl_spec<>} { - func.func private @_FortranAAssign(i8) -> none loc(#loc1) - func.func private @foo() -> !fir.ref loc(#loc2) -} -#loc1 = loc("test.f90":5:1) -#loc2 = loc("test.f90":15:1) - -// CHECK: #[[INT8_TY:.*]] = #llvm.di_basic_type -// CHECK: #[[NONE_TY:.*]] = #llvm.di_null_type -// CHECK: #[[REFNONE_TY:.*]] = #llvm.di_derived_type -// CHECK: #llvm.di_subroutine_type<{{.*}}types = #[[NONE_TY]], #[[INT8_TY]]> -// CHECK: #llvm.di_subroutine_type<{{.*}}types = #[[REFNONE_TY]]> From d104b8e827ef5c3cb723aee92af4adfc8af18e9a Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Sat, 26 Oct 2024 13:55:46 -0700 Subject: [PATCH 102/425] Revert "Reland [StructuralHash] Refactor (#112621)" This reverts commit 98ca9a635bd2fb98cee473a9558687a5b522e219. --- llvm/include/llvm/IR/StructuralHash.h | 7 +- llvm/lib/IR/StructuralHash.cpp | 133 +++++++----------- llvm/lib/Transforms/IPO/MergeFunctions.cpp | 6 +- .../call-and-invoke-with-ranges-attr.ll | 12 +- .../MergeFunc/call-and-invoke-with-ranges.ll | 8 +- llvm/test/Transforms/MergeFunc/inline-asm.ll | 8 +- 6 files changed, 68 insertions(+), 106 deletions(-) diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index e2e192cc9501b3a..57fb45db8491109 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,7 +14,6 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H -#include "llvm/ADT/StableHashing.h" #include namespace llvm { @@ -22,18 +21,20 @@ namespace llvm { class Function; class Module; +using IRHash = uint64_t; + /// Returns a hash of the function \p F. /// \param F The function to hash. /// \param DetailedHash Whether or not to encode additional information in the /// hash. The additional information added into the hash when this flag is set /// to true includes instruction and operand type information. -stable_hash StructuralHash(const Function &F, bool DetailedHash = false); +IRHash StructuralHash(const Function &F, bool DetailedHash = false); /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that /// composed the module hash. -stable_hash StructuralHash(const Module &M, bool DetailedHash = false); +IRHash StructuralHash(const Module &M, bool DetailedHash = false); } // end namespace llvm diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 267a085c5af7053..fb4f33a021a96bc 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -24,93 +24,61 @@ namespace { // by the MergeFunctions pass. class StructuralHashImpl { - stable_hash Hash = 4; + uint64_t Hash = 4; - bool DetailedHash; - - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. - static constexpr stable_hash BlockHeaderHash = 45798; - static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; - static constexpr stable_hash GlobalHeaderHash = 23456; + void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } // This will produce different values on 32-bit and 64-bit systens as // hash_combine returns a size_t. However, this is only used for // detailed hashing which, in-tree, only needs to distinguish between // differences in functions. - // TODO: This is not stable. - template stable_hash hashArbitaryType(const T &V) { - return hash_combine(V); + template void hashArbitaryType(const T &V) { + hash(hash_combine(V)); } - stable_hash hashType(Type *ValueType) { - SmallVector Hashes; - Hashes.emplace_back(ValueType->getTypeID()); + void hashType(Type *ValueType) { + hash(ValueType->getTypeID()); if (ValueType->isIntegerTy()) - Hashes.emplace_back(ValueType->getIntegerBitWidth()); - return stable_hash_combine(Hashes); + hash(ValueType->getIntegerBitWidth()); } public: - StructuralHashImpl() = delete; - explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} - - stable_hash hashConstant(Constant *C) { - SmallVector Hashes; - // TODO: hashArbitaryType() is not stable. - if (ConstantInt *ConstInt = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); - } else if (ConstantFP *ConstFP = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); - } else if (Function *Func = dyn_cast(C)) { + StructuralHashImpl() = default; + + void updateOperand(Value *Operand) { + hashType(Operand->getType()); + + // The cases enumerated below are not exhaustive and are only aimed to + // get decent coverage over the function. + if (ConstantInt *ConstInt = dyn_cast(Operand)) { + hashArbitaryType(ConstInt->getValue()); + } else if (ConstantFP *ConstFP = dyn_cast(Operand)) { + hashArbitaryType(ConstFP->getValue()); + } else if (Argument *Arg = dyn_cast(Operand)) { + hash(Arg->getArgNo()); + } else if (Function *Func = dyn_cast(Operand)) { // Hashing the name will be deterministic as LLVM's hashing infrastructure // has explicit support for hashing strings and will not simply hash // the pointer. - Hashes.emplace_back(hashArbitaryType(Func->getName())); + hashArbitaryType(Func->getName()); } - - return stable_hash_combine(Hashes); - } - - stable_hash hashValue(Value *V) { - // Check constant and return its hash. - Constant *C = dyn_cast(V); - if (C) - return hashConstant(C); - - // Hash argument number. - SmallVector Hashes; - if (Argument *Arg = dyn_cast(V)) - Hashes.emplace_back(Arg->getArgNo()); - - return stable_hash_combine(Hashes); } - stable_hash hashOperand(Value *Operand) { - SmallVector Hashes; - Hashes.emplace_back(hashType(Operand->getType())); - Hashes.emplace_back(hashValue(Operand)); - return stable_hash_combine(Hashes); - } - - stable_hash hashInstruction(const Instruction &Inst) { - SmallVector Hashes; - Hashes.emplace_back(Inst.getOpcode()); + void updateInstruction(const Instruction &Inst, bool DetailedHash) { + hash(Inst.getOpcode()); if (!DetailedHash) - return stable_hash_combine(Hashes); + return; - Hashes.emplace_back(hashType(Inst.getType())); + hashType(Inst.getType()); // Handle additional properties of specific instructions that cause // semantic differences in the IR. if (const auto *ComparisonInstruction = dyn_cast(&Inst)) - Hashes.emplace_back(ComparisonInstruction->getPredicate()); + hash(ComparisonInstruction->getPredicate()); for (const auto &Op : Inst.operands()) - Hashes.emplace_back(hashOperand(Op)); - - return stable_hash_combine(Hashes); + updateOperand(Op); } // A function hash is calculated by considering only the number of arguments @@ -129,17 +97,15 @@ class StructuralHashImpl { // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. - void update(const Function &F) { + void update(const Function &F, bool DetailedHash) { // Declarations don't affect analyses. if (F.isDeclaration()) return; - SmallVector Hashes; - Hashes.emplace_back(Hash); - Hashes.emplace_back(FunctionHeaderHash); + hash(0x62642d6b6b2d6b72); // Function header - Hashes.emplace_back(F.isVarArg()); - Hashes.emplace_back(F.arg_size()); + hash(F.isVarArg()); + hash(F.arg_size()); SmallVector BBs; SmallPtrSet VisitedBBs; @@ -152,17 +118,17 @@ class StructuralHashImpl { while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); - Hashes.emplace_back(BlockHeaderHash); + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the + // opcodes + hash(45798); for (auto &Inst : *BB) - Hashes.emplace_back(hashInstruction(Inst)); + updateInstruction(Inst, DetailedHash); for (const BasicBlock *Succ : successors(BB)) if (VisitedBBs.insert(Succ).second) BBs.push_back(Succ); } - - // Update the combined hash in place. - Hash = stable_hash_combine(Hashes); } void update(const GlobalVariable &GV) { @@ -171,20 +137,15 @@ class StructuralHashImpl { // we ignore anything with the `.llvm` prefix if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) return; - SmallVector Hashes; - Hashes.emplace_back(Hash); - Hashes.emplace_back(GlobalHeaderHash); - Hashes.emplace_back(GV.getValueType()->getTypeID()); - - // Update the combined hash in place. - Hash = stable_hash_combine(Hashes); + hash(23456); // Global header + hash(GV.getValueType()->getTypeID()); } - void update(const Module &M) { + void update(const Module &M, bool DetailedHash) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F); + update(F, DetailedHash); } uint64_t getHash() const { return Hash; } @@ -192,14 +153,14 @@ class StructuralHashImpl { } // namespace -stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { - StructuralHashImpl H(DetailedHash); - H.update(F); +IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { + StructuralHashImpl H; + H.update(F, DetailedHash); return H.getHash(); } -stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { - StructuralHashImpl H(DetailedHash); - H.update(M); +IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { + StructuralHashImpl H; + H.update(M, DetailedHash); return H.getHash(); } diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index ad16b0b3501495e..b50a700e09038f1 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -172,14 +172,14 @@ namespace { class FunctionNode { mutable AssertingVH F; - stable_hash Hash; + IRHash Hash; public: // Note the hash is recalculated potentially multiple times, but it is cheap. FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {} Function *getFunc() const { return F; } - stable_hash getHash() const { return Hash; } + IRHash getHash() const { return Hash; } /// Replace the reference to the function F by the function G, assuming their /// implementations are equal. @@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) { // All functions in the module, ordered by hash. Functions with a unique // hash value are easily eliminated. - std::vector> HashedFuncs; + std::vector> HashedFuncs; for (Function &Func : M) { if (isEligibleForMerging(Func)) { HashedFuncs.push_back({StructuralHash(Func), &Func}); diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll index cbf14165548ec53..e5d62319bf9db7b 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll @@ -80,8 +80,8 @@ lpad: } define i8 @invoke_with_same_range() personality ptr undef { -; CHECK-DAG: @invoke_with_same_range() -; CHECK-DAG: tail call i8 @invoke_with_range() +; CHECK-LABEL: @invoke_with_same_range() +; CHECK: tail call i8 @invoke_with_range() %out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad next: @@ -93,15 +93,15 @@ lpad: } define i8 @call_with_same_range() { -; CHECK-DAG: @call_with_same_range -; CHECK-DAG: tail call i8 @call_with_range +; CHECK-LABEL: @call_with_same_range +; CHECK: tail call i8 @call_with_range %out = call range(i8 0, 2) i8 @dummy() ret i8 %out } define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) { -; CHECK-DAG: @call_with_same_range_attr -; CHECK-DAG: tail call i8 @call_with_range_attr +; CHECK-LABEL: @call_with_same_range_attr +; CHECK: tail call i8 @call_with_range_attr %out = call i8 @dummy2(i8 %v) ret i8 %out } diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll index 39e5a11181a4f03..e7718ca84d31657 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll @@ -64,8 +64,8 @@ lpad: } define i8 @invoke_with_same_range() personality ptr undef { -; CHECK-DAG: @invoke_with_same_range() -; CHECK-DAG: tail call i8 @invoke_with_range() +; CHECK-LABEL: @invoke_with_same_range() +; CHECK: tail call i8 @invoke_with_range() %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0 next: @@ -77,8 +77,8 @@ lpad: } define i8 @call_with_same_range() { -; CHECK-DAG: @call_with_same_range -; CHECK-DAG: tail call i8 @call_with_range +; CHECK-LABEL: @call_with_same_range +; CHECK: tail call i8 @call_with_range bitcast i8 0 to i8 %out = call i8 @dummy(), !range !0 ret i8 %out diff --git a/llvm/test/Transforms/MergeFunc/inline-asm.ll b/llvm/test/Transforms/MergeFunc/inline-asm.ll index 970757e8d53afbb..7cc6afd2f8f7bdc 100644 --- a/llvm/test/Transforms/MergeFunc/inline-asm.ll +++ b/llvm/test/Transforms/MergeFunc/inline-asm.ll @@ -3,11 +3,11 @@ ; CHECK-LABEL: @int_ptr_arg_different ; CHECK-NEXT: call void asm -; CHECK-DAG: @int_ptr_null -; CHECK-DAG: tail call void @float_ptr_null() +; CHECK-LABEL: @int_ptr_null +; CHECK-NEXT: tail call void @float_ptr_null() -; CHECK-DAG: @int_ptr_arg_same -; CHECK-DAG: tail call void @float_ptr_arg_same(ptr %0) +; CHECK-LABEL: @int_ptr_arg_same +; CHECK-NEXT: tail call void @float_ptr_arg_same(ptr %0) ; Used to satisfy minimum size limit declare void @stuff() From d72e711e864dad7e3a434d66f3febad2b1596335 Mon Sep 17 00:00:00 2001 From: Jinsong Ji Date: Sat, 26 Oct 2024 17:40:11 -0400 Subject: [PATCH 103/425] [NFC] [CMake] Add -Wno-dangling-else for GCC built unittests (#112817) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is one of the many PRs to fix errors with LLVM_ENABLE_WERROR=on. Built by GCC 11. Fix warnings: llvm/unittests/ProfileData/CoverageMappingTest.cpp: In member function ‘virtual void {anonymous}::CoverageMappingTest_TVIdxBuilder_Test::TestBody()’: llvm/unittests/ProfileData/CoverageMappingTest.cpp:1116:10: error: suggest explicit braces to avoid ambiguous ‘else’ [-Werror=dangling-else] 1116 | if (Node.NextIDs[C] < 0) The problem here is because these macros, eg: EXPECT_TRUE are expanded to a single line multi-statement code with if/else, which is indeed ambiguous after pre-processing. a simple example would be like: https://godbolt.org/z/4zjn56qrP if(x) switch (0) case 0: default: if (...) ; else return;; Given that omit braces in such cases is part of LLVM's style guide, and it is hard to add braces in gtest just for GCC's warning, add -Wno-dangling-else for GCC instead. --- llvm/unittests/CMakeLists.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt index 911ede701982f61..8892f3e75729ab3 100644 --- a/llvm/unittests/CMakeLists.txt +++ b/llvm/unittests/CMakeLists.txt @@ -14,6 +14,20 @@ function(add_llvm_target_unittest test_dir_name) add_llvm_unittest(${test_dir_name} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN}) endfunction() +# gtest macros like EXPECT_TRUE are expanded to a single line +# multi-statement code with if/else. eg: +# if (...) +# EXPECT_TURE(...) +# will be expanded into something like: +# if(...) +# switch (0) case 0: default: if (...) ; else return;; +# GCC may emit false positive dangling-else warnings for such code. +# However, such warnings are actually against LLVM's style guide. +# disable the warning for GCC so that one can enbable Werror. +if (CMAKE_COMPILER_IS_GNUCXX) + list(APPEND LLVM_COMPILE_FLAGS "-Wno-dangling-else") +endif () + add_subdirectory(ADT) add_subdirectory(Analysis) add_subdirectory(AsmParser) From 1941c5180b91d792200d5e868d45c96e99bda35e Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Sat, 26 Oct 2024 09:20:26 -0700 Subject: [PATCH 104/425] Reland (2nd attempt) [StructuralHash] Refactor (#112621) This is largely NFC, and it prepares for #112638. - Use stable_hash instead of uint64_t - Rename update* to hash* functions. They compute stable_hash locally and return it. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608. --- llvm/include/llvm/IR/StructuralHash.h | 7 +- llvm/lib/IR/StructuralHash.cpp | 133 +++++++++++------- llvm/lib/Transforms/IPO/MergeFunctions.cpp | 6 +- .../call-and-invoke-with-ranges-attr.ll | 12 +- .../MergeFunc/call-and-invoke-with-ranges.ll | 8 +- llvm/test/Transforms/MergeFunc/inline-asm.ll | 8 +- 6 files changed, 106 insertions(+), 68 deletions(-) diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 57fb45db8491109..e2e192cc9501b3a 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/StableHashing.h" #include namespace llvm { @@ -21,20 +22,18 @@ namespace llvm { class Function; class Module; -using IRHash = uint64_t; - /// Returns a hash of the function \p F. /// \param F The function to hash. /// \param DetailedHash Whether or not to encode additional information in the /// hash. The additional information added into the hash when this flag is set /// to true includes instruction and operand type information. -IRHash StructuralHash(const Function &F, bool DetailedHash = false); +stable_hash StructuralHash(const Function &F, bool DetailedHash = false); /// Returns a hash of the module \p M by hashing all functions and global /// variables contained within. \param M The module to hash. \param DetailedHash /// Whether or not to encode additional information in the function hashes that /// composed the module hash. -IRHash StructuralHash(const Module &M, bool DetailedHash = false); +stable_hash StructuralHash(const Module &M, bool DetailedHash = false); } // end namespace llvm diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index fb4f33a021a96bc..267a085c5af7053 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -24,61 +24,93 @@ namespace { // by the MergeFunctions pass. class StructuralHashImpl { - uint64_t Hash = 4; + stable_hash Hash = 4; - void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + bool DetailedHash; + + // This random value acts as a block header, as otherwise the partition of + // opcodes into BBs wouldn't affect the hash, only the order of the opcodes. + static constexpr stable_hash BlockHeaderHash = 45798; + static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; + static constexpr stable_hash GlobalHeaderHash = 23456; // This will produce different values on 32-bit and 64-bit systens as // hash_combine returns a size_t. However, this is only used for // detailed hashing which, in-tree, only needs to distinguish between // differences in functions. - template void hashArbitaryType(const T &V) { - hash(hash_combine(V)); + // TODO: This is not stable. + template stable_hash hashArbitaryType(const T &V) { + return hash_combine(V); } - void hashType(Type *ValueType) { - hash(ValueType->getTypeID()); + stable_hash hashType(Type *ValueType) { + SmallVector Hashes; + Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) - hash(ValueType->getIntegerBitWidth()); + Hashes.emplace_back(ValueType->getIntegerBitWidth()); + return stable_hash_combine(Hashes); } public: - StructuralHashImpl() = default; - - void updateOperand(Value *Operand) { - hashType(Operand->getType()); - - // The cases enumerated below are not exhaustive and are only aimed to - // get decent coverage over the function. - if (ConstantInt *ConstInt = dyn_cast(Operand)) { - hashArbitaryType(ConstInt->getValue()); - } else if (ConstantFP *ConstFP = dyn_cast(Operand)) { - hashArbitaryType(ConstFP->getValue()); - } else if (Argument *Arg = dyn_cast(Operand)) { - hash(Arg->getArgNo()); - } else if (Function *Func = dyn_cast(Operand)) { + StructuralHashImpl() = delete; + explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + + stable_hash hashConstant(Constant *C) { + SmallVector Hashes; + // TODO: hashArbitaryType() is not stable. + if (ConstantInt *ConstInt = dyn_cast(C)) { + Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); + } else if (ConstantFP *ConstFP = dyn_cast(C)) { + Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); + } else if (Function *Func = dyn_cast(C)) { // Hashing the name will be deterministic as LLVM's hashing infrastructure // has explicit support for hashing strings and will not simply hash // the pointer. - hashArbitaryType(Func->getName()); + Hashes.emplace_back(hashArbitaryType(Func->getName())); } + + return stable_hash_combine(Hashes); + } + + stable_hash hashValue(Value *V) { + // Check constant and return its hash. + Constant *C = dyn_cast(V); + if (C) + return hashConstant(C); + + // Hash argument number. + SmallVector Hashes; + if (Argument *Arg = dyn_cast(V)) + Hashes.emplace_back(Arg->getArgNo()); + + return stable_hash_combine(Hashes); } - void updateInstruction(const Instruction &Inst, bool DetailedHash) { - hash(Inst.getOpcode()); + stable_hash hashOperand(Value *Operand) { + SmallVector Hashes; + Hashes.emplace_back(hashType(Operand->getType())); + Hashes.emplace_back(hashValue(Operand)); + return stable_hash_combine(Hashes); + } + + stable_hash hashInstruction(const Instruction &Inst) { + SmallVector Hashes; + Hashes.emplace_back(Inst.getOpcode()); if (!DetailedHash) - return; + return stable_hash_combine(Hashes); - hashType(Inst.getType()); + Hashes.emplace_back(hashType(Inst.getType())); // Handle additional properties of specific instructions that cause // semantic differences in the IR. if (const auto *ComparisonInstruction = dyn_cast(&Inst)) - hash(ComparisonInstruction->getPredicate()); + Hashes.emplace_back(ComparisonInstruction->getPredicate()); for (const auto &Op : Inst.operands()) - updateOperand(Op); + Hashes.emplace_back(hashOperand(Op)); + + return stable_hash_combine(Hashes); } // A function hash is calculated by considering only the number of arguments @@ -97,15 +129,17 @@ class StructuralHashImpl { // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. - void update(const Function &F, bool DetailedHash) { + void update(const Function &F) { // Declarations don't affect analyses. if (F.isDeclaration()) return; - hash(0x62642d6b6b2d6b72); // Function header + SmallVector Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(FunctionHeaderHash); - hash(F.isVarArg()); - hash(F.arg_size()); + Hashes.emplace_back(F.isVarArg()); + Hashes.emplace_back(F.arg_size()); SmallVector BBs; SmallPtrSet VisitedBBs; @@ -118,17 +152,17 @@ class StructuralHashImpl { while (!BBs.empty()) { const BasicBlock *BB = BBs.pop_back_val(); - // This random value acts as a block header, as otherwise the partition of - // opcodes into BBs wouldn't affect the hash, only the order of the - // opcodes - hash(45798); + Hashes.emplace_back(BlockHeaderHash); for (auto &Inst : *BB) - updateInstruction(Inst, DetailedHash); + Hashes.emplace_back(hashInstruction(Inst)); for (const BasicBlock *Succ : successors(BB)) if (VisitedBBs.insert(Succ).second) BBs.push_back(Succ); } + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } void update(const GlobalVariable &GV) { @@ -137,15 +171,20 @@ class StructuralHashImpl { // we ignore anything with the `.llvm` prefix if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) return; - hash(23456); // Global header - hash(GV.getValueType()->getTypeID()); + SmallVector Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(GlobalHeaderHash); + Hashes.emplace_back(GV.getValueType()->getTypeID()); + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } - void update(const Module &M, bool DetailedHash) { + void update(const Module &M) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F, DetailedHash); + update(F); } uint64_t getHash() const { return Hash; } @@ -153,14 +192,14 @@ class StructuralHashImpl { } // namespace -IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { - StructuralHashImpl H; - H.update(F, DetailedHash); +stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) { + StructuralHashImpl H(DetailedHash); + H.update(F); return H.getHash(); } -IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { - StructuralHashImpl H; - H.update(M, DetailedHash); +stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { + StructuralHashImpl H(DetailedHash); + H.update(M); return H.getHash(); } diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp index b50a700e09038f1..ad16b0b3501495e 100644 --- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp +++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp @@ -172,14 +172,14 @@ namespace { class FunctionNode { mutable AssertingVH F; - IRHash Hash; + stable_hash Hash; public: // Note the hash is recalculated potentially multiple times, but it is cheap. FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {} Function *getFunc() const { return F; } - IRHash getHash() const { return Hash; } + stable_hash getHash() const { return Hash; } /// Replace the reference to the function F by the function G, assuming their /// implementations are equal. @@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) { // All functions in the module, ordered by hash. Functions with a unique // hash value are easily eliminated. - std::vector> HashedFuncs; + std::vector> HashedFuncs; for (Function &Func : M) { if (isEligibleForMerging(Func)) { HashedFuncs.push_back({StructuralHash(Func), &Func}); diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll index e5d62319bf9db7b..b5f29906b051eed 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll @@ -80,8 +80,8 @@ lpad: } define i8 @invoke_with_same_range() personality ptr undef { -; CHECK-LABEL: @invoke_with_same_range() -; CHECK: tail call i8 @invoke_with_range() +; CHECK-DAG: @invoke_with_same_range() +; CHECK-DAG: tail call i8 @invoke_with_range() %out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad next: @@ -93,15 +93,15 @@ lpad: } define i8 @call_with_same_range() { -; CHECK-LABEL: @call_with_same_range -; CHECK: tail call i8 @call_with_range +; CHECK-DAG: @call_with_same_range() +; CHECK-DAG: tail call i8 @call_with_range() %out = call range(i8 0, 2) i8 @dummy() ret i8 %out } define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) { -; CHECK-LABEL: @call_with_same_range_attr -; CHECK: tail call i8 @call_with_range_attr +; CHECK-DAG: @call_with_same_range_attr +; CHECK-DAG: tail call i8 @call_with_range_attr %out = call i8 @dummy2(i8 %v) ret i8 %out } diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll index e7718ca84d31657..39e5a11181a4f03 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll @@ -64,8 +64,8 @@ lpad: } define i8 @invoke_with_same_range() personality ptr undef { -; CHECK-LABEL: @invoke_with_same_range() -; CHECK: tail call i8 @invoke_with_range() +; CHECK-DAG: @invoke_with_same_range() +; CHECK-DAG: tail call i8 @invoke_with_range() %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0 next: @@ -77,8 +77,8 @@ lpad: } define i8 @call_with_same_range() { -; CHECK-LABEL: @call_with_same_range -; CHECK: tail call i8 @call_with_range +; CHECK-DAG: @call_with_same_range +; CHECK-DAG: tail call i8 @call_with_range bitcast i8 0 to i8 %out = call i8 @dummy(), !range !0 ret i8 %out diff --git a/llvm/test/Transforms/MergeFunc/inline-asm.ll b/llvm/test/Transforms/MergeFunc/inline-asm.ll index 7cc6afd2f8f7bdc..970757e8d53afbb 100644 --- a/llvm/test/Transforms/MergeFunc/inline-asm.ll +++ b/llvm/test/Transforms/MergeFunc/inline-asm.ll @@ -3,11 +3,11 @@ ; CHECK-LABEL: @int_ptr_arg_different ; CHECK-NEXT: call void asm -; CHECK-LABEL: @int_ptr_null -; CHECK-NEXT: tail call void @float_ptr_null() +; CHECK-DAG: @int_ptr_null +; CHECK-DAG: tail call void @float_ptr_null() -; CHECK-LABEL: @int_ptr_arg_same -; CHECK-NEXT: tail call void @float_ptr_arg_same(ptr %0) +; CHECK-DAG: @int_ptr_arg_same +; CHECK-DAG: tail call void @float_ptr_arg_same(ptr %0) ; Used to satisfy minimum size limit declare void @stuff() From 597ccb800829af69ebc18cd7c75d878c8d21de6e Mon Sep 17 00:00:00 2001 From: Justin Fargnoli Date: Sat, 26 Oct 2024 17:15:19 -0700 Subject: [PATCH 105/425] [NFC] Fix typo in description of `AfterPassInvalidatedCallbacks` (#113779) --- llvm/include/llvm/IR/PassInstrumentation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h index 45ee372e7959d1a..4e65804179ae715 100644 --- a/llvm/include/llvm/IR/PassInstrumentation.h +++ b/llvm/include/llvm/IR/PassInstrumentation.h @@ -182,7 +182,7 @@ class PassInstrumentationCallbacks { BeforeNonSkippedPassCallbacks; /// These are run on passes that have just run. SmallVector, 4> AfterPassCallbacks; - /// These are run passes that have just run on invalidated IR. + /// These are run on passes that have just run on invalidated IR. SmallVector, 4> AfterPassInvalidatedCallbacks; /// These are run on analyses that are about to be run. From 89b5d88fb81362b4fb2f833790aa40b7eaa186da Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 26 Oct 2024 19:13:56 -0700 Subject: [PATCH 106/425] [ADT] Use std::string_view inside StringRef (#113775) This patch makes minimum changes to replace Data and Length with an instance of std::string_view. Previously, I opted for public inheritance (#113752), but I encountered a lot of errors from gcc stemming from ambiguity between std::string_view and StringRef. The composition approach in this patch gives us greater control at the expense of forwarder functions. --- llvm/include/llvm/ADT/StringRef.h | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index d5f30b88c4c6a2a..f879bbf7164fd6c 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -60,11 +60,7 @@ namespace llvm { using const_reverse_iterator = std::reverse_iterator; private: - /// The start of the string, in an external buffer. - const char *Data = nullptr; - - /// The length of the string. - size_t Length = 0; + std::string_view View; // Workaround memcmp issue with null pointers (undefined behavior) // by providing a specialized version @@ -86,27 +82,25 @@ namespace llvm { /// Construct a string ref from a cstring. /*implicit*/ constexpr StringRef(const char *Str) - : Data(Str), Length(Str ? + : View(Str, Str ? // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 - __builtin_strlen(Str) + __builtin_strlen(Str) #else - std::char_traits::length(Str) + std::char_traits::length(Str) #endif - : 0) { + : 0) { } /// Construct a string ref from a pointer and length. /*implicit*/ constexpr StringRef(const char *data, size_t length) - : Data(data), Length(length) {} + : View(data, length) {} /// Construct a string ref from an std::string. - /*implicit*/ StringRef(const std::string &Str) - : Data(Str.data()), Length(Str.length()) {} + /*implicit*/ StringRef(const std::string &Str) : View(Str) {} /// Construct a string ref from an std::string_view. - /*implicit*/ constexpr StringRef(std::string_view Str) - : Data(Str.data()), Length(Str.size()) {} + /*implicit*/ constexpr StringRef(std::string_view Str) : View(Str) {} /// @} /// @name Iterators @@ -140,13 +134,13 @@ namespace llvm { /// data - Get a pointer to the start of the string (which may not be null /// terminated). - [[nodiscard]] constexpr const char *data() const { return Data; } + [[nodiscard]] constexpr const char *data() const { return View.data(); } /// empty - Check if the string is empty. [[nodiscard]] constexpr bool empty() const { return size() == 0; } /// size - Get the string size. - [[nodiscard]] constexpr size_t size() const { return Length; } + [[nodiscard]] constexpr size_t size() const { return View.size(); } /// front - Get the first character in the string. [[nodiscard]] char front() const { From 242c77018f669c0b8f06b262050fcc4dde486738 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 26 Oct 2024 19:29:49 -0700 Subject: [PATCH 107/425] [ARM] clang-format (NFC) I'm planning to post a patch in this area. --- .../lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 906519fef45db4d..68f1199fd12e148 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5081,23 +5081,23 @@ ParseStatus ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { StringRef OptStr = Tok.getString(); Opt = StringSwitch(OptStr.slice(0, OptStr.size()).lower()) - .Case("sy", ARM_MB::SY) - .Case("st", ARM_MB::ST) - .Case("ld", ARM_MB::LD) - .Case("sh", ARM_MB::ISH) - .Case("ish", ARM_MB::ISH) - .Case("shst", ARM_MB::ISHST) - .Case("ishst", ARM_MB::ISHST) - .Case("ishld", ARM_MB::ISHLD) - .Case("nsh", ARM_MB::NSH) - .Case("un", ARM_MB::NSH) - .Case("nshst", ARM_MB::NSHST) - .Case("nshld", ARM_MB::NSHLD) - .Case("unst", ARM_MB::NSHST) - .Case("osh", ARM_MB::OSH) - .Case("oshst", ARM_MB::OSHST) - .Case("oshld", ARM_MB::OSHLD) - .Default(~0U); + .Case("sy", ARM_MB::SY) + .Case("st", ARM_MB::ST) + .Case("ld", ARM_MB::LD) + .Case("sh", ARM_MB::ISH) + .Case("ish", ARM_MB::ISH) + .Case("shst", ARM_MB::ISHST) + .Case("ishst", ARM_MB::ISHST) + .Case("ishld", ARM_MB::ISHLD) + .Case("nsh", ARM_MB::NSH) + .Case("un", ARM_MB::NSH) + .Case("nshst", ARM_MB::NSHST) + .Case("nshld", ARM_MB::NSHLD) + .Case("unst", ARM_MB::NSHST) + .Case("osh", ARM_MB::OSH) + .Case("oshst", ARM_MB::OSHST) + .Case("oshld", ARM_MB::OSHLD) + .Default(~0U); // ishld, oshld, nshld and ld are only available from ARMv8. if (!hasV8Ops() && (Opt == ARM_MB::ISHLD || Opt == ARM_MB::OSHLD || From 0dd9fdcf83cd00f51669b32c96937a97ef4b339e Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Sat, 26 Oct 2024 20:02:05 -0700 Subject: [PATCH 108/425] [StructuralHash] Support Differences (#112638) This computes a structural hash while allowing for selective ignoring of certain operands based on a custom function that is provided. Instead of a single hash value, it now returns FunctionHashInfo which includes a hash value, an instruction mapping, and a map to track the operand location and its corresponding hash value that is ignored. Depends on https://github.com/llvm/llvm-project/pull/112621. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608. --- llvm/include/llvm/Analysis/StructuralHash.h | 13 +- llvm/include/llvm/IR/StructuralHash.h | 45 ++++++ llvm/lib/Analysis/StructuralHash.cpp | 27 +++- llvm/lib/IR/StructuralHash.cpp | 153 +++++++++++++++--- llvm/lib/Passes/PassBuilder.cpp | 14 +- llvm/lib/Passes/PassRegistry.def | 7 +- .../StructuralHash/structural-hash-printer.ll | 24 ++- llvm/unittests/IR/StructuralHashTest.cpp | 61 +++++++ 8 files changed, 304 insertions(+), 40 deletions(-) diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h index 9f33c69aed345c9..4c9f063bc7d2c8c 100644 --- a/llvm/include/llvm/Analysis/StructuralHash.h +++ b/llvm/include/llvm/Analysis/StructuralHash.h @@ -13,15 +13,22 @@ namespace llvm { +enum class StructuralHashOptions { + None, /// Hash with opcode only. + Detailed, /// Hash with opcode and operands. + CallTargetIgnored, /// Ignore call target operand when computing hash. +}; + /// Printer pass for StructuralHashes class StructuralHashPrinterPass : public PassInfoMixin { raw_ostream &OS; - bool EnableDetailedStructuralHash; + const StructuralHashOptions Options; public: - explicit StructuralHashPrinterPass(raw_ostream &OS, bool Detailed) - : OS(OS), EnableDetailedStructuralHash(Detailed) {} + explicit StructuralHashPrinterPass(raw_ostream &OS, + StructuralHashOptions Options) + : OS(OS), Options(Options) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index e2e192cc9501b3a..071575137ff572e 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,7 +14,9 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StableHashing.h" +#include "llvm/IR/Instruction.h" #include namespace llvm { @@ -35,6 +37,49 @@ stable_hash StructuralHash(const Function &F, bool DetailedHash = false); /// composed the module hash. stable_hash StructuralHash(const Module &M, bool DetailedHash = false); +/// The pair of an instruction index and a operand index. +using IndexPair = std::pair; + +/// A map from an instruction index to an instruction pointer. +using IndexInstrMap = MapVector; + +/// A map from an IndexPair to a stable hash. +using IndexOperandHashMapType = DenseMap; + +/// A function that takes an instruction and an operand index and returns true +/// if the operand should be ignored in the function hash computation. +using IgnoreOperandFunc = std::function; + +struct FunctionHashInfo { + /// A hash value representing the structural content of the function + stable_hash FunctionHash; + /// A mapping from instruction indices to instruction pointers + std::unique_ptr IndexInstruction; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. This can be used to analyze or + /// reconstruct the differences in ignored operands + std::unique_ptr IndexOperandHashMap; + + FunctionHashInfo(stable_hash FuntionHash, + std::unique_ptr IndexInstruction, + std::unique_ptr IndexOperandHashMap) + : FunctionHash(FuntionHash), + IndexInstruction(std::move(IndexInstruction)), + IndexOperandHashMap(std::move(IndexOperandHashMap)) {} +}; + +/// Computes a structural hash of a given function, considering the structure +/// and content of the function's instructions while allowing for selective +/// ignoring of certain operands based on custom criteria. This hash can be used +/// to identify functions that are structurally similar or identical, which is +/// useful in optimizations, deduplication, or analysis tasks. +/// \param F The function to hash. +/// \param IgnoreOp A callable that takes an instruction and an operand index, +/// and returns true if the operand should be ignored in the hash computation. +/// \return A FunctionHashInfo structure +FunctionHashInfo StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp); + } // end namespace llvm #endif diff --git a/llvm/lib/Analysis/StructuralHash.cpp b/llvm/lib/Analysis/StructuralHash.cpp index 3a2341fe59ad9ce..4f2e003148b6065 100644 --- a/llvm/lib/Analysis/StructuralHash.cpp +++ b/llvm/lib/Analysis/StructuralHash.cpp @@ -21,14 +21,33 @@ using namespace llvm; PreservedAnalyses StructuralHashPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) { OS << "Module Hash: " - << format("%016" PRIx64, StructuralHash(M, EnableDetailedStructuralHash)) + << format("%016" PRIx64, + StructuralHash(M, Options != StructuralHashOptions::None)) << "\n"; for (Function &F : M) { if (F.isDeclaration()) continue; - OS << "Function " << F.getName() << " Hash: " - << format("%016" PRIx64, StructuralHash(F, EnableDetailedStructuralHash)) - << "\n"; + if (Options == StructuralHashOptions::CallTargetIgnored) { + auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) { + return I->getOpcode() == Instruction::Call && + isa(I->getOperand(OpndIdx)); + }; + auto FuncHashInfo = StructuralHashWithDifferences(F, IgnoreOp); + OS << "Function " << F.getName() + << " Hash: " << format("%016" PRIx64, FuncHashInfo.FunctionHash) + << "\n"; + for (auto &[IndexPair, OpndHash] : *FuncHashInfo.IndexOperandHashMap) { + auto [InstIndex, OpndIndex] = IndexPair; + OS << "\tIgnored Operand Hash: " << format("%016" PRIx64, OpndHash) + << " at (" << InstIndex << "," << OpndIndex << ")\n"; + } + } else { + OS << "Function " << F.getName() << " Hash: " + << format( + "%016" PRIx64, + StructuralHash(F, Options == StructuralHashOptions::Detailed)) + << "\n"; + } } return PreservedAnalyses::all(); } diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 267a085c5af7053..a51f9124af04dad 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -34,14 +34,18 @@ class StructuralHashImpl { static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; static constexpr stable_hash GlobalHeaderHash = 23456; - // This will produce different values on 32-bit and 64-bit systens as - // hash_combine returns a size_t. However, this is only used for - // detailed hashing which, in-tree, only needs to distinguish between - // differences in functions. - // TODO: This is not stable. - template stable_hash hashArbitaryType(const T &V) { - return hash_combine(V); - } + /// IgnoreOp is a function that returns true if the operand should be ignored. + IgnoreOperandFunc IgnoreOp = nullptr; + /// A mapping from instruction indices to instruction pointers. + /// The index represents the position of an instruction based on the order in + /// which it is first encountered. + std::unique_ptr IndexInstruction = nullptr; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. + std::unique_ptr IndexOperandHashMap = nullptr; + + /// Assign a unique ID to each Value in the order they are first seen. + DenseMap ValueToId; stable_hash hashType(Type *ValueType) { SmallVector Hashes; @@ -53,23 +57,95 @@ class StructuralHashImpl { public: StructuralHashImpl() = delete; - explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + explicit StructuralHashImpl(bool DetailedHash, + IgnoreOperandFunc IgnoreOp = nullptr) + : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { + if (IgnoreOp) { + IndexInstruction = std::make_unique(); + IndexOperandHashMap = std::make_unique(); + } + } + + stable_hash hashAPInt(const APInt &I) { + SmallVector Hashes; + Hashes.emplace_back(I.getBitWidth()); + auto RawVals = ArrayRef(I.getRawData(), I.getNumWords()); + Hashes.append(RawVals.begin(), RawVals.end()); + return stable_hash_combine(Hashes); + } + + stable_hash hashAPFloat(const APFloat &F) { + return hashAPInt(F.bitcastToAPInt()); + } + + stable_hash hashGlobalValue(const GlobalValue *GV) { + if (!GV->hasName()) + return 0; + return stable_hash_name(GV->getName()); + } + // Compute a hash for a Constant. This function is logically similar to + // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here + // we're interested in computing a hash rather than comparing two Constants. + // Some of the logic is simplified, e.g, we don't expand GEPOperator. stable_hash hashConstant(Constant *C) { SmallVector Hashes; - // TODO: hashArbitaryType() is not stable. - if (ConstantInt *ConstInt = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); - } else if (ConstantFP *ConstFP = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); - } else if (Function *Func = dyn_cast(C)) { - // Hashing the name will be deterministic as LLVM's hashing infrastructure - // has explicit support for hashing strings and will not simply hash - // the pointer. - Hashes.emplace_back(hashArbitaryType(Func->getName())); + + Type *Ty = C->getType(); + Hashes.emplace_back(hashType(Ty)); + + if (C->isNullValue()) { + Hashes.emplace_back(static_cast('N')); + return stable_hash_combine(Hashes); } - return stable_hash_combine(Hashes); + if (auto *G = dyn_cast(C)) { + Hashes.emplace_back(hashGlobalValue(G)); + return stable_hash_combine(Hashes); + } + + if (const auto *Seq = dyn_cast(C)) { + Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); + return stable_hash_combine(Hashes); + } + + switch (C->getValueID()) { + case Value::ConstantIntVal: { + const APInt &Int = cast(C)->getValue(); + Hashes.emplace_back(hashAPInt(Int)); + return stable_hash_combine(Hashes); + } + case Value::ConstantFPVal: { + const APFloat &APF = cast(C)->getValueAPF(); + Hashes.emplace_back(hashAPFloat(APF)); + return stable_hash_combine(Hashes); + } + case Value::ConstantArrayVal: + case Value::ConstantStructVal: + case Value::ConstantVectorVal: + case Value::ConstantExprVal: { + for (const auto &Op : C->operands()) { + auto H = hashConstant(cast(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::BlockAddressVal: { + const BlockAddress *BA = cast(C); + auto H = hashGlobalValue(BA->getFunction()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + case Value::DSOLocalEquivalentVal: { + const auto *Equiv = cast(C); + auto H = hashGlobalValue(Equiv->getGlobalValue()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + default: + // Skip other types of constants for simplicity. + return stable_hash_combine(Hashes); + } } stable_hash hashValue(Value *V) { @@ -83,6 +159,10 @@ class StructuralHashImpl { if (Argument *Arg = dyn_cast(V)) Hashes.emplace_back(Arg->getArgNo()); + // Get an index (an insertion order) for the non-constant value. + auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size()); + Hashes.emplace_back(It->second); + return stable_hash_combine(Hashes); } @@ -107,8 +187,20 @@ class StructuralHashImpl { if (const auto *ComparisonInstruction = dyn_cast(&Inst)) Hashes.emplace_back(ComparisonInstruction->getPredicate()); - for (const auto &Op : Inst.operands()) - Hashes.emplace_back(hashOperand(Op)); + unsigned InstIdx = 0; + if (IndexInstruction) { + InstIdx = IndexInstruction->size(); + IndexInstruction->try_emplace(InstIdx, const_cast(&Inst)); + } + + for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { + auto OpndHash = hashOperand(Op); + if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { + assert(IndexOperandHashMap); + IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash); + } else + Hashes.emplace_back(OpndHash); + } return stable_hash_combine(Hashes); } @@ -188,6 +280,14 @@ class StructuralHashImpl { } uint64_t getHash() const { return Hash; } + + std::unique_ptr getIndexInstrMap() { + return std::move(IndexInstruction); + } + + std::unique_ptr getIndexPairOpndHashMap() { + return std::move(IndexOperandHashMap); + } }; } // namespace @@ -203,3 +303,12 @@ stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { H.update(M); return H.getHash(); } + +FunctionHashInfo +llvm::StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp) { + StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); + H.update(F); + return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), + H.getIndexPairOpndHashMap()); +} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index f5ce405ab8d9616..d1f75dfb5350a0c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1175,9 +1175,17 @@ Expected parseMemProfUsePassOptions(StringRef Params) { return Result; } -Expected parseStructuralHashPrinterPassOptions(StringRef Params) { - return PassBuilder::parseSinglePassOption(Params, "detailed", - "StructuralHashPrinterPass"); +Expected +parseStructuralHashPrinterPassOptions(StringRef Params) { + if (Params.empty()) + return StructuralHashOptions::None; + if (Params == "detailed") + return StructuralHashOptions::Detailed; + if (Params == "call-target-ignored") + return StructuralHashOptions::CallTargetIgnored; + return make_error( + formatv("invalid structural hash printer parameter '{0}' ", Params).str(), + inconvertibleErrorCode()); } Expected parseWinEHPrepareOptions(StringRef Params) { diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 549c1359b5852ce..017ae311c55eb40 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -220,10 +220,11 @@ MODULE_PASS_WITH_PARAMS( parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N") MODULE_PASS_WITH_PARAMS( "print", "StructuralHashPrinterPass", - [](bool EnableDetailedStructuralHash) { - return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash); + [](StructuralHashOptions Options) { + return StructuralHashPrinterPass(dbgs(), Options); }, - parseStructuralHashPrinterPassOptions, "detailed") + parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored") + #undef MODULE_PASS_WITH_PARAMS #ifndef CGSCC_ANALYSIS diff --git a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll index 5936199bf32f437..3c23b54d2973697 100644 --- a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll +++ b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll @@ -1,17 +1,21 @@ ; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s -check-prefix=DETAILED-HASH +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s -check-prefix=CALLTARGETIGNORED-HASH ; Add a declaration so that we can test we skip it. -declare i64 @d1() +declare i64 @d1(i64) +declare i64 @e1(i64) define i64 @f1(i64 %a) { %b = add i64 %a, 1 - ret i64 %b + %c = call i64 @d1(i64 %b) + ret i64 %c } -define i32 @f2(i32 %a) { - %b = add i32 %a, 2 - ret i32 %b +define i64 @f2(i64 %a) { + %b = add i64 %a, 1 + %c = call i64 @e1(i64 %b) + ret i64 %c } ; CHECK: Module Hash: {{([a-f0-9]{16,})}} @@ -22,3 +26,13 @@ define i32 @f2(i32 %a) { ; DETAILED-HASH-NEXT: Function f1 Hash: [[DF1H:([a-f0-9]{16,})]] ; DETAILED-HASH-NOT: [[DF1H]] ; DETAILED-HASH-NEXT: Function f2 Hash: {{([a-f0-9]{16,})}} + +; When ignoring the call target, check if `f1` and `f2` produce the same function hash. +; The index for the call instruction is 1, and the index of the call target operand is 1. +; The ignored operand hashes for different call targets should be different. +; CALLTARGETIGNORED-HASH: Module Hash: {{([a-f0-9]{16,})}} +; CALLTARGETIGNORED-HASH-NEXT: Function f1 Hash: [[IF1H:([a-f0-9]{16,})]] +; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: [[IO1H:([a-f0-9]{16,})]] at (1,1) +; CALLTARGETIGNORED-HASH-NEXT: Function f2 Hash: [[IF1H]] +; CALLTARGETIGNORED-HASH-NOT: [[IO1H]] +; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: {{([a-f0-9]{16,})}} at (1,1) diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp index 64e66aa5f97a6d0..81c17120a1f6fff 100644 --- a/llvm/unittests/IR/StructuralHashTest.cpp +++ b/llvm/unittests/IR/StructuralHashTest.cpp @@ -10,6 +10,7 @@ #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Module.h" #include "llvm/Support/SourceMgr.h" +#include "gmock/gmock-matchers.h" #include "gtest/gtest.h" #include @@ -18,6 +19,11 @@ using namespace llvm; namespace { +using testing::Contains; +using testing::Key; +using testing::Pair; +using testing::SizeIs; + std::unique_ptr parseIR(LLVMContext &Context, const char *IR) { SMDiagnostic Err; std::unique_ptr M = parseAssemblyString(IR, Err, Context); @@ -239,4 +245,59 @@ TEST(StructuralHashTest, ArgumentNumber) { EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } + +TEST(StructuralHashTest, Differences) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f1(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f1(i64)"); + auto *F1 = M1->getFunction("f"); + std::unique_ptr M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f2(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f2(i64)"); + auto *F2 = M2->getFunction("g"); + + // They are originally different when not ignoring any operand. + EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true)); + EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash, + StructuralHashWithDifferences(*F2, nullptr).FunctionHash); + + // When we ignore the call target f1 vs f2, they have the same hash. + auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) { + return I->getOpcode() == Instruction::Call && OpndIdx == 1; + }; + auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp); + auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp); + EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash); + + // There are total 3 instructions. + EXPECT_THAT(*FuncHashInfo1.IndexInstruction, SizeIs(3)); + EXPECT_THAT(*FuncHashInfo2.IndexInstruction, SizeIs(3)); + + // The only 1 operand (the call target) has been ignored. + EXPECT_THAT(*FuncHashInfo1.IndexOperandHashMap, SizeIs(1u)); + EXPECT_THAT(*FuncHashInfo2.IndexOperandHashMap, SizeIs(1u)); + + // The index pair of instruction and operand (1, 1) is a key in the map. + ASSERT_THAT(*FuncHashInfo1.IndexOperandHashMap, Contains(Key(Pair(1, 1)))); + ASSERT_THAT(*FuncHashInfo2.IndexOperandHashMap, Contains(Key(Pair(1, 1)))); + + // The indexed instruciton must be the call instruction as shown in the + // IgnoreOp above. + EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + + // The ignored operand hashes (for f1 vs. f2) are different. + EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}), + FuncHashInfo2.IndexOperandHashMap->lookup({1, 1})); +} + } // end anonymous namespace From 93da6423af5f00a3bbee4d2ee571ccc7887f444d Mon Sep 17 00:00:00 2001 From: Sirui Mu Date: Sun, 27 Oct 2024 11:52:00 +0800 Subject: [PATCH 109/425] [mlir][LLVM] Add builders for llvm.intr.assume (#113317) This patch adds several new builders for llvm.intr.assume that build the operation with additional operand bundles. --- .../include/mlir/Dialect/LLVMIR/LLVMDialect.h | 8 ++++ .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td | 9 ++++- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 39 ++++++++++++++++++- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h index d236cae0d80882c..63e007cdc335cc1 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h @@ -33,6 +33,7 @@ #include "mlir/Support/ThreadLocalCache.h" #include "llvm/ADT/PointerEmbeddedInt.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -87,6 +88,13 @@ class GEPArg : public PointerUnion { } // namespace LLVM } // namespace mlir +namespace mlir { +namespace LLVM { +struct AssumeAlignTag {}; +struct AssumeSeparateStorageTag {}; +} // namespace LLVM +} // namespace mlir + ///// Ops ///// #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/LLVMOps.h.inc" diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 845c88b1be77509..d07ebbacc604346 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -450,7 +450,14 @@ def LLVM_AssumeOp }]; let builders = [ - OpBuilder<(ins "Value":$cond)> + OpBuilder<(ins "Value":$cond)>, + OpBuilder<(ins "Value":$cond, + "ArrayRef>":$opBundles)>, + OpBuilder<(ins "Value":$cond, "llvm::StringRef":$tag, "ValueRange":$args)>, + OpBuilder<(ins "Value":$cond, "AssumeAlignTag":$tag, "Value":$ptr, + "Value":$align)>, + OpBuilder<(ins "Value":$cond, "AssumeSeparateStorageTag":$tag, + "Value":$ptr1, "Value":$ptr2)> ]; let hasVerifier = 1; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index cc73878a64ff67e..c9bc9533ca2a6bd 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -3438,7 +3438,44 @@ void InlineAsmOp::getEffects( void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state, mlir::Value cond) { return build(builder, state, cond, /*op_bundle_operands=*/{}, - /*op_bundle_tags=*/{}); + /*op_bundle_tags=*/ArrayAttr{}); +} + +void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state, + Value cond, + ArrayRef> opBundles) { + SmallVector opBundleOperands; + SmallVector opBundleTags; + opBundleOperands.reserve(opBundles.size()); + opBundleTags.reserve(opBundles.size()); + + for (const llvm::OperandBundleDefT &bundle : opBundles) { + opBundleOperands.emplace_back(bundle.inputs()); + opBundleTags.push_back( + StringAttr::get(builder.getContext(), bundle.getTag())); + } + + auto opBundleTagsAttr = ArrayAttr::get(builder.getContext(), opBundleTags); + return build(builder, state, cond, opBundleOperands, opBundleTagsAttr); +} + +void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state, + Value cond, llvm::StringRef tag, ValueRange args) { + llvm::OperandBundleDefT opBundle( + tag.str(), SmallVector(args.begin(), args.end())); + return build(builder, state, cond, opBundle); +} + +void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state, + Value cond, AssumeAlignTag, Value ptr, Value align) { + return build(builder, state, cond, "align", ValueRange{ptr, align}); +} + +void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state, + Value cond, AssumeSeparateStorageTag, Value ptr1, + Value ptr2) { + return build(builder, state, cond, "separate_storage", + ValueRange{ptr1, ptr2}); } LogicalResult LLVM::AssumeOp::verify() { return verifyOperandBundles(*this); } From 355e6948d44a97781cc184a22c9b51760cae6de0 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Sat, 26 Oct 2024 20:53:20 -0700 Subject: [PATCH 110/425] [MemProf] Fix clone edge comparison (#113753) The issue fixed in PR113337 exposed a bug in the comparisons done in allocTypesMatch, which compares a vector of alloc types to those in the given vector of Edges. The form of std::equal used, which didn't provide the end iterator for the Edges vector, will iterate through as many entries in the Edges vector as in the InAllocTypes vector, which can fail if there are fewer entries in the Edges vector, because we may dereference a bogus Edge pointer. This function is called twice, once for the Node, with its callee edges, in which case the number of edges should always match the number of entries in allocTypesMatch, which is computed from the Node's callee edges. It was also called for Node's clones, and it turns out that after cloning and edge modifications done for other allocations, the number of callee edges in Node and its clones may no longer match. In some cases, more common with memprof ICP before the PR113337, the number of clone edges can be smaller leading to a bad dereference. I found for a large application even before adding memprof ICP support we sometimes call this with fewer entries in the clone's callee edges, but were getting lucky as they had allocation type None, and we didn't end up attempting to dereference the bad edge pointer. Fix this by passing Edges.end() to std::equal, which means std::equal will fail if the number of entries in the 2 vectors are not equal. However, this is too conservative, as clone edges may have been added or removed since it was initially cloned, and in fact can be wrong as we may not be comparing allocation types corresponding to the same callee. Therefore, a couple of enhancements are made to avoid regressing and improve the checking and cloning: - Don't bother calling the alloc type comparison when the clone and the Node's alloc type for the current allocation are precise (have a single allocation type) and are the same (which is guaranteed by an earlier check, and an assert is added to confirm that). In that case we can trivially determine that the clone can be used. - Split the alloc type matching handling into a separate function for the clone case. In that case, for each of the InAllocType entries, attempt to find and compare to the clone callee edge with the same callee as the corresponding original node callee. To create a test case I needed to take a spec application (xalancbmk), and repeatedly apply random hot/cold-ness to the memprof contexts when building, until I hit the problematic case. I then reduced that full LTO IR using llvm-reduce and then manually. --- .../IPO/MemProfContextDisambiguation.cpp | 66 +++++++++++-- .../fix_clone_checking.ll | 99 +++++++++++++++++++ 2 files changed, 159 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 905186edcbecc40..da5ded23ecc0453 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -928,8 +928,11 @@ bool allocTypesMatch( const std::vector &InAllocTypes, const std::vector>> &Edges) { + // This should be called only when the InAllocTypes vector was computed for + // this set of Edges. Make sure the sizes are the same. + assert(InAllocTypes.size() == Edges.size()); return std::equal( - InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), + InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(), [](const uint8_t &l, const std::shared_ptr> &r) { // Can share if one of the edges is None type - don't @@ -942,6 +945,46 @@ bool allocTypesMatch( }); } +// Helper to check if the alloc types for all edges recorded in the +// InAllocTypes vector match the alloc types for callee edges in the given +// clone. Because the InAllocTypes were computed from the original node's callee +// edges, and other cloning could have happened after this clone was created, we +// need to find the matching clone callee edge, which may or may not exist. +template +bool allocTypesMatchClone( + const std::vector &InAllocTypes, + const ContextNode *Clone) { + const ContextNode *Node = Clone->CloneOf; + assert(Node); + // InAllocTypes should have been computed for the original node's callee + // edges. + assert(InAllocTypes.size() == Node->CalleeEdges.size()); + // First create a map of the clone callee edge callees to the edge alloc type. + DenseMap *, uint8_t> + EdgeCalleeMap; + for (const auto &E : Clone->CalleeEdges) { + assert(!EdgeCalleeMap.contains(E->Callee)); + EdgeCalleeMap[E->Callee] = E->AllocTypes; + } + // Next, walk the original node's callees, and look for the corresponding + // clone edge to that callee. + for (unsigned I = 0; I < Node->CalleeEdges.size(); I++) { + auto Iter = EdgeCalleeMap.find(Node->CalleeEdges[I]->Callee); + // Not found is ok, we will simply add an edge if we use this clone. + if (Iter == EdgeCalleeMap.end()) + continue; + // Can share if one of the edges is None type - don't + // care about the type along that edge as it doesn't + // exist for those context ids. + if (InAllocTypes[I] == (uint8_t)AllocationType::None || + Iter->second == (uint8_t)AllocationType::None) + continue; + if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[I])) + return false; + } + return true; +} + } // end anonymous namespace template @@ -3364,11 +3407,22 @@ void CallsiteContextGraph::identifyClones( allocTypeToUse(CallerAllocTypeForAlloc)) continue; - if (!allocTypesMatch( - CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges)) - continue; - Clone = CurClone; - break; + bool BothSingleAlloc = hasSingleAllocType(CurClone->AllocTypes) && + hasSingleAllocType(CallerAllocTypeForAlloc); + // The above check should mean that if both have single alloc types that + // they should be equal. + assert(!BothSingleAlloc || + CurClone->AllocTypes == CallerAllocTypeForAlloc); + + // If either both have a single alloc type (which are the same), or if the + // clone's callee edges have the same alloc types as those for the current + // allocation on Node's callee edges (CalleeEdgeAllocTypesForCallerEdge), + // then we can reuse this clone. + if (BothSingleAlloc || allocTypesMatchClone( + CalleeEdgeAllocTypesForCallerEdge, CurClone)) { + Clone = CurClone; + break; + } } // The edge iterator is adjusted when we move the CallerEdge to the clone. diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll new file mode 100644 index 000000000000000..75cebae0b82971a --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll @@ -0,0 +1,99 @@ +;; Test to make sure we don't fail when cloning in a case where we end up with +;; a clone that has fewer edges than the node it was initially cloned from. +;; This test was reduced and simplified from xalancbmk with some random hotness +;; applied to the profile that reproduced the issue. + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes \ +; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | FileCheck %s + +;; Make sure we created some clones +; CHECK: created clone A.memprof.1 +; CHECK: created clone C.memprof.1 +; CHECK: created clone D.memprof.1 +; CHECK: created clone E.memprof.1 +; CHECK: created clone B.memprof.1 +; CHECK: created clone F.memprof.1 +; CHECK: created clone G.memprof.1 + +; ModuleID = '' +source_filename = "reduced.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +define void @A() { + call void @B(), !callsite !0 + ret void +} + +define void @C() { + call void @D(), !callsite !1 + ret void +} + +define void @D() { + call void @A(), !callsite !2 + ret void +} + +define void @E() { + %1 = call ptr @_Znwm(i64 0), !memprof !3, !callsite !20 + ret void +} + +define void @B() { + call void @F(), !callsite !21 + ret void +} + +define void @F() { + call void @E(), !callsite !22 + call void @G(), !callsite !23 + ret void +} + +define void @G() { + %1 = call ptr @_Znwm(i64 0), !memprof !24, !callsite !37 + ret void +} + +declare ptr @_Znwm(i64) + +!0 = !{i64 1995602625719775354} +!1 = !{i64 4312698517630782220} +!2 = !{i64 5516454029445989383} +!3 = !{!4, !6, !8, !10, !12, !14, !16, !18} +!4 = !{!5, !"notcold"} +!5 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 1995602625719775354} +!6 = !{!7, !"cold"} +!7 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 2077908580042347045, i64 4312698517630782220, i64 5379466077518675850} +!8 = !{!9, !"cold"} +!9 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 2077908580042347045, i64 4312698517630782220, i64 -7632894069000375689} +!10 = !{!11, !"cold"} +!11 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 2939944783060497247} +!12 = !{!13, !"notcold"} +!13 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 5642549674080861567, i64 5516454029445989383, i64 4312698517630782220, i64 -7632894069000375689} +!14 = !{!15, !"cold"} +!15 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 5642549674080861567, i64 5516454029445989383, i64 4312698517630782220, i64 -1805555115991223293} +!16 = !{!17, !"notcold"} +!17 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 -4746997736434041076, i64 5516454029445989383, i64 4312698517630782220, i64 -1805555115991223293} +!18 = !{!19, !"notcold"} +!19 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 -4637272929643682959} +!20 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862} +!21 = !{i64 -6456074186195384663} +!22 = !{i64 7147584705143805656} +!23 = !{i64 3938822378769440754} +!24 = !{!25, !27, !29, !31, !33, !35} +!25 = !{!26, !"cold"} +!26 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 1995602625719775354, i64 5516454029445989383, i64 4312698517630782220, i64 -1805555115991223293} +!27 = !{!28, !"notcold"} +!28 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 2077908580042347045, i64 4312698517630782220, i64 -7632894069000375689} +!29 = !{!30, !"cold"} +!30 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 -4746997736434041076, i64 5516454029445989383, i64 4312698517630782220, i64 -7632894069000375689} +!31 = !{!32, !"notcold"} +!32 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 -4746997736434041076, i64 5516454029445989383, i64 4312698517630782220, i64 -1805555115991223293} +!33 = !{!34, !"cold"} +!34 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 -4637272929643682959} +!35 = !{!36, !"notcold"} +!36 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 -4409412896859835674} +!37 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196} From d5b42db00f0b21855501b01e8cd80326e1ce763d Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Sun, 27 Oct 2024 04:56:53 +0100 Subject: [PATCH 111/425] [clang][bytecode][NFC] Only do CheckConstant checks for global pointers (#113786) We can check isStatic() early here and save ourselves some work. --- clang/lib/AST/ByteCode/Interp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index b7a6c224c80f8e9..6d40fb03696d48b 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -400,7 +400,7 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { } static bool CheckConstant(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { - if (!Ptr.isBlockPointer()) + if (!Ptr.isStatic() || !Ptr.isBlockPointer()) return true; return CheckConstant(S, OpPC, Ptr.getDeclDesc()); } From 7b88e7530d4329ff0c7c8638f69b39fa1e540218 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Sun, 27 Oct 2024 05:06:47 +0100 Subject: [PATCH 112/425] [clang][bytecode][NFC] Make CheckVolatile static (#113785) --- clang/lib/AST/ByteCode/Interp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 6d40fb03696d48b..6e45cfb7e8a20ce 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -513,8 +513,8 @@ bool CheckMutable(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { return false; } -bool CheckVolatile(InterpState &S, CodePtr OpPC, const Pointer &Ptr, - AccessKinds AK) { +static bool CheckVolatile(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK) { assert(Ptr.isLive()); // FIXME: This check here might be kinda expensive. Maybe it would be better From 60d2feded5c0f55b21d042ee2f35227847d66ee0 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 26 Oct 2024 22:07:56 -0700 Subject: [PATCH 113/425] [ARM] Remove a redundant call to StringRef::slice (NFC) (#113783) OptStr.slice(0, OptStr.size()) is exactly the same as OptStr. --- llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 68f1199fd12e148..0df1c336a221462 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5080,7 +5080,7 @@ ParseStatus ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { if (Tok.is(AsmToken::Identifier)) { StringRef OptStr = Tok.getString(); - Opt = StringSwitch(OptStr.slice(0, OptStr.size()).lower()) + Opt = StringSwitch(OptStr.lower()) .Case("sy", ARM_MB::SY) .Case("st", ARM_MB::ST) .Case("ld", ARM_MB::LD) From d2e9532fe12dc2568f40c2648ff4bb3730141aed Mon Sep 17 00:00:00 2001 From: Eirik Byrkjeflot Anonsen Date: Sun, 27 Oct 2024 10:09:39 +0100 Subject: [PATCH 114/425] [DemoteRegToStack] Use correct variable for branch instructions in DemoteRegToStack (#113798) I happened to see this code, and it seems "obviously" wrong to me. So here's what I think this code is supposed to look like. --- llvm/lib/Transforms/Utils/DemoteRegToStack.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp index 3a33b591d355826..6337913cdbbeb77 100644 --- a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -55,8 +55,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, for (unsigned i = 0; i < CBI->getNumSuccessors(); i++) { auto *Succ = CBI->getSuccessor(i); if (!Succ->getSinglePredecessor()) { - assert(isCriticalEdge(II, i) && "Expected a critical edge!"); - [[maybe_unused]] BasicBlock *BB = SplitCriticalEdge(II, i); + assert(isCriticalEdge(CBI, i) && "Expected a critical edge!"); + [[maybe_unused]] BasicBlock *BB = SplitCriticalEdge(CBI, i); assert(BB && "Unable to split critical edge."); } } From 45c84d59c454ba2b57affcd09a74f8d91e93bff7 Mon Sep 17 00:00:00 2001 From: goldsteinn <35538541+goldsteinn@users.noreply.github.com> Date: Sun, 27 Oct 2024 06:49:20 -0700 Subject: [PATCH 115/425] [libc] Add `__builtin_expect` tag on assert conditions; NFC (#99498) --- libc/src/__support/CMakeLists.txt | 1 + libc/src/__support/libc_assert.h | 5 +++-- libc/src/assert/assert.h | 16 +++++++++++++--- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 4785895b562b5e3..14a3acff8fae935 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -277,6 +277,7 @@ add_header_library( DEPENDS .integer_to_string libc.src.__support.OSUtil.osutil + libc.src.__support.macros.optimization ) add_header_library( diff --git a/libc/src/__support/libc_assert.h b/libc/src/__support/libc_assert.h index e21a58a0c8aad94..3db179ff6721248 100644 --- a/libc/src/__support/libc_assert.h +++ b/libc/src/__support/libc_assert.h @@ -24,7 +24,8 @@ #include "src/__support/OSUtil/exit.h" #include "src/__support/OSUtil/io.h" #include "src/__support/integer_to_string.h" -#include "src/__support/macros/attributes.h" // For LIBC_INLINE +#include "src/__support/macros/attributes.h" // For LIBC_INLINE +#include "src/__support/macros/optimization.h" // For LIBC_UNLIKELY namespace LIBC_NAMESPACE_DECL { @@ -71,7 +72,7 @@ LIBC_INLINE void report_assertion_failure(const char *assertion, #define LIBC_ASSERT(COND) \ do { \ - if (!(COND)) { \ + if (LIBC_UNLIKELY(!(COND))) { \ LIBC_NAMESPACE::write_to_stderr(__FILE__ ":" __LIBC_LINE_STR__ \ ": Assertion failed: '" #COND \ "' in function: '"); \ diff --git a/libc/src/assert/assert.h b/libc/src/assert/assert.h index 6f352af1988b371..1ea19ea5554f0aa 100644 --- a/libc/src/assert/assert.h +++ b/libc/src/assert/assert.h @@ -18,8 +18,18 @@ #ifdef NDEBUG #define assert(e) (void)0 #else + +#ifdef __has_builtin +#if __has_builtin(__builtin_expect) +#define __LIBC_ASSERT_LIKELY(e) __builtin_expect(e, 1) +#endif +#endif +#ifndef __LIBC_ASSERT_LIKELY +#define __LIBC_ASSERT_LIKELY(e) e +#endif + #define assert(e) \ - ((e) ? (void)0 \ - : LIBC_NAMESPACE::__assert_fail(#e, __FILE__, __LINE__, \ - __PRETTY_FUNCTION__)) + (__LIBC_ASSERT_LIKELY(e) ? (void)0 \ + : LIBC_NAMESPACE::__assert_fail( \ + #e, __FILE__, __LINE__, __PRETTY_FUNCTION__)) #endif // NDEBUG From 5287a9b3456fe7aefa24c8da95ef265b8dba875b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sun, 27 Oct 2024 07:28:27 -0700 Subject: [PATCH 116/425] [mlir] Prefer StringRef::substr to slice (NFC) (#113788) I'm planning to migrate StringRef to std::string_view eventually. Since std::string_view does not have slice, this patch migrates: slice(0, N) to substr(0, N) slice(N, StringRef::npos) to substr(N) --- mlir/include/mlir/Support/IndentedOstream.h | 3 +-- mlir/lib/Query/QueryParser.cpp | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Support/IndentedOstream.h b/mlir/include/mlir/Support/IndentedOstream.h index 101aa8b631d299d..eeab36806c4ee70 100644 --- a/mlir/include/mlir/Support/IndentedOstream.h +++ b/mlir/include/mlir/Support/IndentedOstream.h @@ -166,8 +166,7 @@ inline void mlir::raw_indented_ostream::write_impl(const char *ptr, break; } - auto split = - std::make_pair(str.slice(0, idx), str.slice(idx + 1, StringRef::npos)); + auto split = std::make_pair(str.substr(0, idx), str.substr(idx + 1)); // Print empty new line without spaces if line only has spaces and no extra // prefix is requested. if (!split.first.ltrim().empty() || !currentExtraPrefix.empty()) diff --git a/mlir/lib/Query/QueryParser.cpp b/mlir/lib/Query/QueryParser.cpp index 13ee931cc5227fe..31aead7d403d0df 100644 --- a/mlir/lib/Query/QueryParser.cpp +++ b/mlir/lib/Query/QueryParser.cpp @@ -181,8 +181,8 @@ QueryRef QueryParser::doParse() { if (!matcher) { return makeInvalidQueryFromDiagnostics(diag); } - auto actualSource = origMatcherSource.slice(0, origMatcherSource.size() - - matcherSource.size()); + auto actualSource = origMatcherSource.substr(0, origMatcherSource.size() - + matcherSource.size()); QueryRef query = new MatchQuery(actualSource, *matcher); query->remainingContent = matcherSource; return query; From 7fe149cdf09d04fb8390b97c91bd9214c968cd3e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sun, 27 Oct 2024 16:22:18 +0100 Subject: [PATCH 117/425] [VPlan] Replace getIRBasicBlock with IRBB in VPIRBB::execute (NFC). Suggested in https://github.com/llvm/llvm-project/pull/109975. This makes the function consistent throughout. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 44ffcb954a28421..0484543d2d0398c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -453,13 +453,13 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { void VPIRBasicBlock::execute(VPTransformState *State) { assert(getHierarchicalSuccessors().size() <= 2 && "VPIRBasicBlock can have at most two successors at the moment!"); - State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator()); - executeRecipes(State, getIRBasicBlock()); + State->Builder.SetInsertPoint(IRBB->getTerminator()); + executeRecipes(State, IRBB); if (getSingleSuccessor()) { - assert(isa(getIRBasicBlock()->getTerminator())); - auto *Br = State->Builder.CreateBr(getIRBasicBlock()); + assert(isa(IRBB->getTerminator())); + auto *Br = State->Builder.CreateBr(IRBB); Br->setOperand(0, nullptr); - getIRBasicBlock()->getTerminator()->eraseFromParent(); + IRBB->getTerminator()->eraseFromParent(); } for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { From e33aec89ef1378d80e8df2e965ac5e6e6aa2e3de Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Sun, 27 Oct 2024 22:24:31 +0530 Subject: [PATCH 118/425] [MLIR][NVVM] Update the elect.sync Op to use intrinsics (#113757) Recently, we added an intrinsic for the elect.sync PTX instruction (PR 104780). This patch updates the corresponding Op in NVVM Dialect to lower to the intrinsic instead of inline-ptx. The existing test under Conversion/ is migrated to check for the new pattern. A separate test is added to verify the lowered intrinsic under the Target/ directory. Signed-off-by: Durgadoss R --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 33 ++++++++++--------- .../Conversion/NVVMToLLVM/nvvm-to-llvm.mlir | 8 +---- mlir/test/Target/LLVMIR/nvvmir.mlir | 9 +++++ 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 5806295cedb198c..7cb4b5c346ad972 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -783,24 +783,27 @@ def NVVM_SyncWarpOp : let assemblyFormat = "$mask attr-dict `:` type($mask)"; } - -def NVVM_ElectSyncOp : NVVM_Op<"elect.sync", - [DeclareOpInterfaceMethods]> +def NVVM_ElectSyncOp : NVVM_Op<"elect.sync"> { + let summary = "Elect one leader thread"; + let description = [{ + The `elect.sync` instruction elects one predicated active leader + thread from among a set of threads specified in membermask. + The membermask is set to `0xFFFFFFFF` for the current version + of this Op. The predicate result is set to `True` for the + leader thread, and `False` for all other threads. + + [For more information, see PTX ISA] + (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-elect-sync) + }]; + let results = (outs I1:$pred); let assemblyFormat = "attr-dict `->` type(results)"; - let extraClassDefinition = [{ - std::string $cppClass::getPtx() { - return std::string( - "{ \n" - ".reg .u32 rx; \n" - ".reg .pred px; \n" - " mov.pred %0, 0; \n" - " elect.sync rx | px, 0xFFFFFFFF;\n" - "@px mov.pred %0, 1; \n" - "}\n" - ); - } + string llvmBuilder = [{ + auto *resultTuple = createIntrinsicCall(builder, + llvm::Intrinsic::nvvm_elect_sync, {builder.getInt32(0xFFFFFFFF)}); + // Extract the second value into $pred + $pred = builder.CreateExtractValue(resultTuple, 1); }]; } diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index 375e2951a037cd9..66b736c18718f3b 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -579,13 +579,7 @@ func.func @wgmma_f32_e5m2_e4m3(%descA : i64, %descB : i64) -> !mat32f32 { // ----- func.func @elect_one_leader_sync() { - // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "{ - // CHECK-SAME: .reg .u32 rx; - // CHECK-SAME: .reg .pred px; - // CHECK-SAME: mov.pred $0, 0; - // CHECK-SAME: elect.sync rx | px, 0xFFFFFFFF; - // CHECK-SAME: @px mov.pred $0, 1; - // CHECK-SAME: "=b" : () -> i1 + // CHECK: %[[RES:.*]] = nvvm.elect.sync -> i1 %cnd = nvvm.elect.sync -> i1 return } diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 0471e5faf845783..75ce958b43fd346 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -259,6 +259,15 @@ llvm.func @nvvm_vote(%0 : i32, %1 : i1) -> i32 { llvm.return %3 : i32 } +// CHECK-LABEL: @nvvm_elect_sync +llvm.func @nvvm_elect_sync() -> i1 { + // CHECK: %[[RES:.*]] = call { i32, i1 } @llvm.nvvm.elect.sync(i32 -1) + // CHECK-NEXT: %[[PRED:.*]] = extractvalue { i32, i1 } %[[RES]], 1 + // CHECK-NEXT: ret i1 %[[PRED]] + %0 = nvvm.elect.sync -> i1 + llvm.return %0 : i1 +} + // CHECK-LABEL: @nvvm_mma_mn8n8k4_row_col_f32_f32 llvm.func @nvvm_mma_mn8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>, %b0 : vector<2xf16>, %b1 : vector<2xf16>, From eef3766ae5a39fea6f7f81ac444f878969743d85 Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Sun, 27 Oct 2024 18:58:47 +0000 Subject: [PATCH 119/425] Assumed-size arrays are shared and cannot be privatized (#112963) Do not error out if default(none) is specified and the region has an assumed-size array. Fixes #110442 --- flang/lib/Semantics/resolve-directives.cpp | 2 ++ flang/test/Semantics/OpenMP/default-none.f90 | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 513e42bee976a9a..979570a7d4103a5 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2052,6 +2052,8 @@ void OmpAttributeVisitor::Post(const parser::OpenMPAllocatorsConstruct &x) { static bool IsPrivatizable(const Symbol *sym) { auto *misc{sym->detailsIf()}; return !IsProcedure(*sym) && !IsNamedConstant(*sym) && + !semantics::IsAssumedSizeArray( + *sym) && /* OpenMP 5.2, 5.1.1: Assumed-size arrays are shared*/ !sym->owner().IsDerivedType() && sym->owner().kind() != Scope::Kind::ImpliedDos && !sym->detailsIf() && diff --git a/flang/test/Semantics/OpenMP/default-none.f90 b/flang/test/Semantics/OpenMP/default-none.f90 index 11ba878ea779403..761c2385466a088 100644 --- a/flang/test/Semantics/OpenMP/default-none.f90 +++ b/flang/test/Semantics/OpenMP/default-none.f90 @@ -47,3 +47,14 @@ subroutine sb4 end do loop !$omp end parallel end subroutine + +! Test that default(none) does not error for assumed-size array +subroutine sub( aaa) + real,dimension(*),intent(in)::aaa + integer::ip + real::ccc +!$omp parallel do private(ip,ccc) default(none) + do ip = 1, 10 + ccc= aaa(ip) + end do +end subroutine sub From 5621929f7f4878c318deecb592cd03f5ecbb83ba Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Sun, 27 Oct 2024 20:16:24 +0000 Subject: [PATCH 120/425] [Flang][OpenMP] Add parser support for grainsize and num_tasks clause (#113136) These clauses are applicable only for the taskloop directive. Since the directive has a TODO error, skipping the addition of TODOs for these clauses. --- flang/examples/FeatureList/FeatureList.cpp | 4 ++ flang/include/flang/Parser/dump-parse-tree.h | 4 ++ flang/include/flang/Parser/parse-tree.h | 14 +++++++ flang/lib/Lower/OpenMP/Clauses.cpp | 35 +++++++++++++---- flang/lib/Parser/openmp-parsers.cpp | 14 ++++++- flang/lib/Parser/unparse.cpp | 13 +++++++ flang/lib/Semantics/check-omp-structure.cpp | 4 +- flang/test/Parser/OpenMP/taskloop.f90 | 41 ++++++++++++++++++++ llvm/include/llvm/Frontend/OpenMP/OMP.td | 8 ++-- 9 files changed, 122 insertions(+), 15 deletions(-) create mode 100644 flang/test/Parser/OpenMP/taskloop.f90 diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index 06ca12a492d29b0..9fce67e61ed30fa 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -483,6 +483,8 @@ struct NodeVisitor { READ_FEATURE(OmpEndCriticalDirective) READ_FEATURE(OmpEndLoopDirective) READ_FEATURE(OmpEndSectionsDirective) + READ_FEATURE(OmpGrainsizeClause) + READ_FEATURE(OmpGrainsizeClause::Prescriptiveness) READ_FEATURE(OmpIfClause) READ_FEATURE(OmpIfClause::DirectiveNameModifier) READ_FEATURE(OmpLinearClause) @@ -494,6 +496,8 @@ struct NodeVisitor { READ_FEATURE(OmpMapClause) READ_FEATURE(OmpMapClause::TypeModifier) READ_FEATURE(OmpMapClause::Type) + READ_FEATURE(OmpNumTasksClause) + READ_FEATURE(OmpNumTasksClause::Prescriptiveness) READ_FEATURE(OmpObject) READ_FEATURE(OmpObjectList) READ_FEATURE(OmpOrderClause) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 76d2f164fc4bf00..ccdfe980f6f38c2 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -547,6 +547,10 @@ class ParseTreeDumper { NODE_ENUM(OmpOrderClause, Type) NODE(parser, OmpOrderModifier) NODE_ENUM(OmpOrderModifier, Kind) + NODE(parser, OmpGrainsizeClause) + NODE_ENUM(OmpGrainsizeClause, Prescriptiveness) + NODE(parser, OmpNumTasksClause) + NODE_ENUM(OmpNumTasksClause, Prescriptiveness) NODE(parser, OmpProcBindClause) NODE_ENUM(OmpProcBindClause, Type) NODE_ENUM(OmpReductionClause, ReductionModifier) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index c1884f6e88d1ec8..2a312e29a3a44d1 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3559,6 +3559,13 @@ struct OmpDependClause { std::variant u; }; +// OMP 5.2 12.6.1 grainsize-clause -> grainsize ([prescriptiveness :] value) +struct OmpGrainsizeClause { + TUPLE_CLASS_BOILERPLATE(OmpGrainsizeClause); + ENUM_CLASS(Prescriptiveness, Strict); + std::tuple, ScalarIntExpr> t; +}; + // 2.12 if-clause -> IF ([ directive-name-modifier :] scalar-logical-expr) struct OmpIfClause { TUPLE_CLASS_BOILERPLATE(OmpIfClause); @@ -3688,6 +3695,13 @@ struct OmpScheduleClause { t; }; +// OMP 5.2 12.6.2 num_tasks-clause -> num_tasks ([prescriptiveness :] value) +struct OmpNumTasksClause { + TUPLE_CLASS_BOILERPLATE(OmpNumTasksClause); + ENUM_CLASS(Prescriptiveness, Strict); + std::tuple, ScalarIntExpr> t; +}; + // OpenMP Clauses struct OmpClause { UNION_CLASS_BOILERPLATE(OmpClause); diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index ee3d74a7c631af3..3bd89b543288634 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -721,10 +721,20 @@ From make(const parser::OmpClause::From &inp, // Full: empty Grainsize make(const parser::OmpClause::Grainsize &inp, - semantics::SemanticsContext &semaCtx) { - // inp.v -> parser::ScalarIntExpr - return Grainsize{{/*Prescriptiveness=*/std::nullopt, - /*GrainSize=*/makeExpr(inp.v, semaCtx)}}; + semantics::SemanticsContext &semaCtx) { + // inp.v -> parser::OmpGrainsizeClause + using wrapped = parser::OmpGrainsizeClause; + + CLAUSET_ENUM_CONVERT( // + convert, parser::OmpGrainsizeClause::Prescriptiveness, Grainsize::Prescriptiveness, + // clang-format off + MS(Strict, Strict) + // clang-format on + ); + auto &t0 = std::get>(inp.v.t); + auto &t1 = std::get(inp.v.t); + return Grainsize{{/*Prescriptiveness=*/maybeApply(convert, t0), + /*Grainsize=*/makeExpr(t1, semaCtx)}}; } HasDeviceAddr make(const parser::OmpClause::HasDeviceAddr &inp, @@ -971,9 +981,20 @@ Novariants make(const parser::OmpClause::Novariants &inp, NumTasks make(const parser::OmpClause::NumTasks &inp, semantics::SemanticsContext &semaCtx) { - // inp.v -> parser::ScalarIntExpr - return NumTasks{{/*Prescriptiveness=*/std::nullopt, - /*NumTasks=*/makeExpr(inp.v, semaCtx)}}; + // inp.v -> parser::OmpNumTasksClause + using wrapped = parser::OmpNumTasksClause; + + CLAUSET_ENUM_CONVERT( // + convert, parser::OmpNumTasksClause::Prescriptiveness, + NumTasks::Prescriptiveness, + // clang-format off + MS(Strict, Strict) + // clang-format on + ); + auto &t0 = std::get>(inp.v.t); + auto &t1 = std::get(inp.v.t); + return NumTasks{{/*Prescriptiveness=*/maybeApply(convert, t0), + /*NumTasks=*/makeExpr(t1, semaCtx)}}; } NumTeams make(const parser::OmpClause::NumTeams &inp, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index e740c421ca80276..ae0c351fed56d1d 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -408,6 +408,16 @@ TYPE_PARSER(construct( maybe(Parser{} / ":"), "CONCURRENT" >> pure(OmpOrderClause::Type::Concurrent))) +// OMP 5.2 12.6.1 grainsize([ prescriptiveness :] scalar-integer-expression) +TYPE_PARSER(construct( + maybe("STRICT" >> pure(OmpGrainsizeClause::Prescriptiveness::Strict) / ":"), + scalarIntExpr)) + +// OMP 5.2 12.6.2 num_tasks([ prescriptiveness :] scalar-integer-expression) +TYPE_PARSER(construct( + maybe("STRICT" >> pure(OmpNumTasksClause::Prescriptiveness::Strict) / ":"), + scalarIntExpr)) + TYPE_PARSER( construct(designator) || construct("/" >> name / "/")) @@ -464,7 +474,7 @@ TYPE_PARSER( "FROM" >> construct(construct( parenthesized(Parser{}))) || "GRAINSIZE" >> construct(construct( - parenthesized(scalarIntExpr))) || + parenthesized(Parser{}))) || "HAS_DEVICE_ADDR" >> construct(construct( parenthesized(Parser{}))) || @@ -491,7 +501,7 @@ TYPE_PARSER( construct(construct()) || "NOWAIT" >> construct(construct()) || "NUM_TASKS" >> construct(construct( - parenthesized(scalarIntExpr))) || + parenthesized(Parser{}))) || "NUM_TEAMS" >> construct(construct( parenthesized(scalarIntExpr))) || "NUM_THREADS" >> construct(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 19ceb2a3ebc3178..ba4155469073e6a 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2196,6 +2196,16 @@ class UnparseVisitor { Walk(std::get>(x.t), ":"); Walk(std::get(x.t)); } + void Unparse(const OmpGrainsizeClause &x) { + Walk(std::get>(x.t), + ":"); + Walk(std::get(x.t)); + } + void Unparse(const OmpNumTasksClause &x) { + Walk( + std::get>(x.t), ":"); + Walk(std::get(x.t)); + } void Unparse(const OmpDependSinkVecLength &x) { Walk(std::get(x.t)); Walk(std::get(x.t)); @@ -2829,6 +2839,9 @@ class UnparseVisitor { WALK_NESTED_ENUM(OmpCancelType, Type) // OMP cancel-type WALK_NESTED_ENUM(OmpOrderClause, Type) // OMP order-type WALK_NESTED_ENUM(OmpOrderModifier, Kind) // OMP order-modifier + WALK_NESTED_ENUM( + OmpGrainsizeClause, Prescriptiveness) // OMP grainsize-modifier + WALK_NESTED_ENUM(OmpNumTasksClause, Prescriptiveness) // OMP numtasks-modifier WALK_NESTED_ENUM(OmpMapClause, Type) // OMP map-type WALK_NESTED_ENUM(OmpMapClause, TypeModifier) // OMP map-type-modifier #undef WALK_NESTED_ENUM diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 1c2cf304d0ee95f..455322d610d6c28 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2479,12 +2479,14 @@ CHECK_SIMPLE_CLAUSE(Final, OMPC_final) CHECK_SIMPLE_CLAUSE(Flush, OMPC_flush) CHECK_SIMPLE_CLAUSE(From, OMPC_from) CHECK_SIMPLE_CLAUSE(Full, OMPC_full) +CHECK_SIMPLE_CLAUSE(Grainsize, OMPC_grainsize) CHECK_SIMPLE_CLAUSE(Hint, OMPC_hint) CHECK_SIMPLE_CLAUSE(Holds, OMPC_holds) CHECK_SIMPLE_CLAUSE(InReduction, OMPC_in_reduction) CHECK_SIMPLE_CLAUSE(Inclusive, OMPC_inclusive) CHECK_SIMPLE_CLAUSE(Match, OMPC_match) CHECK_SIMPLE_CLAUSE(Nontemporal, OMPC_nontemporal) +CHECK_SIMPLE_CLAUSE(NumTasks, OMPC_num_tasks) CHECK_SIMPLE_CLAUSE(Order, OMPC_order) CHECK_SIMPLE_CLAUSE(Read, OMPC_read) CHECK_SIMPLE_CLAUSE(Threadprivate, OMPC_threadprivate) @@ -2535,8 +2537,6 @@ CHECK_SIMPLE_CLAUSE(OmpxBare, OMPC_ompx_bare) CHECK_SIMPLE_CLAUSE(Fail, OMPC_fail) CHECK_SIMPLE_CLAUSE(Weak, OMPC_weak) -CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize) -CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks) CHECK_REQ_SCALAR_INT_CLAUSE(NumTeams, OMPC_num_teams) CHECK_REQ_SCALAR_INT_CLAUSE(NumThreads, OMPC_num_threads) CHECK_REQ_SCALAR_INT_CLAUSE(OmpxDynCgroupMem, OMPC_ompx_dyn_cgroup_mem) diff --git a/flang/test/Parser/OpenMP/taskloop.f90 b/flang/test/Parser/OpenMP/taskloop.f90 new file mode 100644 index 000000000000000..a9c361046bd5f5d --- /dev/null +++ b/flang/test/Parser/OpenMP/taskloop.f90 @@ -0,0 +1,41 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine parallel_work + integer :: i + +!CHECK: !$OMP TASKLOOP GRAINSIZE(STRICT:500_4) +!PARSE-TREE: OmpBeginLoopDirective +!PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = taskloop +!PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Grainsize -> OmpGrainsizeClause +!PARSE-TREE-NEXT: Prescriptiveness = Strict +!PARSE-TREE-NEXT: Scalar -> Integer -> Expr = '500_4' + !$omp taskloop grainsize(strict: 500) + do i=1,10000 + call loop_body(i) + end do + !$omp end taskloop + +!CHECK: !$OMP TASKLOOP GRAINSIZE(500_4) +!PARSE-TREE: OmpBeginLoopDirective +!PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = taskloop +!PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Grainsize -> OmpGrainsizeClause +!PARSE-TREE-NEXT: Scalar -> Integer -> Expr = '500_4' + !$omp taskloop grainsize(500) + do i=1,10000 + call loop_body(i) + end do + !$omp end taskloop + +!CHECK: !$OMP TASKLOOP NUM_TASKS(STRICT:500_4) +!PARSE-TREE: OmpBeginLoopDirective +!PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = taskloop +!PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> NumTasks -> OmpNumTasksClause +!PARSE-TREE-NEXT: Prescriptiveness = Strict +!PARSE-TREE-NEXT: Scalar -> Integer -> Expr = '500_4' + !$omp taskloop num_tasks(strict: 500) + do i=1,10000 + call loop_body(i) + end do + !$omp end taskloop +end subroutine parallel_work diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index d592f369a17f92c..70179bab4757790 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -185,10 +185,10 @@ def OMPC_Full: Clause<"full"> { let clangClass = "OMPFullClause"; } def OMP_GRAINSIZE_Strict : ClauseVal<"strict", 1, 1> {} -def OMP_GRAINSIZE_Unknown : ClauseVal<"unkonwn", 2, 0> { let isDefault = 1; } +def OMP_GRAINSIZE_Unknown : ClauseVal<"unknown", 2, 0> { let isDefault = 1; } def OMPC_GrainSize : Clause<"grainsize"> { let clangClass = "OMPGrainsizeClause"; - let flangClass = "ScalarIntExpr"; + let flangClass = "OmpGrainsizeClause"; let enumClauseValue = "GrainsizeType"; let allowedClauseValues = [ OMP_GRAINSIZE_Strict, @@ -301,10 +301,10 @@ def OMPC_NoWait : Clause<"nowait"> { let clangClass = "OMPNowaitClause"; } def OMP_NUMTASKS_Strict : ClauseVal<"strict", 1, 1> {} -def OMP_NUMTASKS_Unknown : ClauseVal<"unkonwn", 2, 0> { let isDefault = 1; } +def OMP_NUMTASKS_Unknown : ClauseVal<"unknown", 2, 0> { let isDefault = 1; } def OMPC_NumTasks : Clause<"num_tasks"> { let clangClass = "OMPNumTasksClause"; - let flangClass = "ScalarIntExpr"; + let flangClass = "OmpNumTasksClause"; let enumClauseValue = "NumTasksType"; let allowedClauseValues = [ OMP_NUMTASKS_Strict, From 7b3da7b3b2b0e2f322dddf1f343571cc7fd09b09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Sun, 27 Oct 2024 23:14:07 +0100 Subject: [PATCH 121/425] [GlobalISel][AArch64] Legalize G_ADD, G_SUB, G_AND, G_OR, and G_XOR for SVE (#110561) Credits: https://github.com/llvm/llvm-project/pull/72976 LLVM ERROR: cannot select: %3:zpr() = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64) ;; mul define void @xmulnxv2i64( %a, %b, ptr %p) { entry: %c = mul %a, %b store %c, ptr %p, align 16 ret void } define void @mulnxv4i32( %a, %b, ptr %p) { entry: %c = mul %a, %b store %c, ptr %p, align 16 ret void } define void @mulnxv8i16( %a, %b, ptr %p) { entry: %c = mul %a, %b store %c, ptr %p, align 16 ret void } define void @mulnxv16i8( %a, %b, ptr %p) { entry: %c = mul %a, %b store %c, ptr %p, align 16 ret void } --- .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 12 +- .../AArch64/AArch64GenRegisterBankInfo.def | 2 + .../AArch64/GISel/AArch64CallLowering.cpp | 4 +- .../GISel/AArch64InstructionSelector.cpp | 10 +- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 30 ++- .../GlobalISel/legalizer-info-validation.mir | 1 - .../CodeGen/AArch64/GlobalISel/sve-integer.ll | 208 ++++++++++++++++++ 7 files changed, 257 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index bcd44abb2088a04..6d71c150c8da6b8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -998,8 +998,7 @@ class LegalizeRuleSet { LegalizeAction::WidenScalar, [=](const LegalityQuery &Query) { const LLT VecTy = Query.Types[TypeIdx]; - return VecTy.isVector() && !VecTy.isScalable() && - VecTy.getSizeInBits() < VectorSize; + return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize; }, [=](const LegalityQuery &Query) { const LLT VecTy = Query.Types[TypeIdx]; @@ -1172,7 +1171,7 @@ class LegalizeRuleSet { LegalizeAction::MoreElements, [=](const LegalityQuery &Query) { LLT VecTy = Query.Types[TypeIdx]; - return VecTy.isVector() && VecTy.getElementType() == EltTy && + return VecTy.isFixedVector() && VecTy.getElementType() == EltTy && VecTy.getNumElements() < MinElements; }, [=](const LegalityQuery &Query) { @@ -1190,7 +1189,7 @@ class LegalizeRuleSet { LegalizeAction::MoreElements, [=](const LegalityQuery &Query) { LLT VecTy = Query.Types[TypeIdx]; - return VecTy.isVector() && VecTy.getElementType() == EltTy && + return VecTy.isFixedVector() && VecTy.getElementType() == EltTy && (VecTy.getNumElements() % NumElts != 0); }, [=](const LegalityQuery &Query) { @@ -1210,7 +1209,7 @@ class LegalizeRuleSet { LegalizeAction::FewerElements, [=](const LegalityQuery &Query) { LLT VecTy = Query.Types[TypeIdx]; - return VecTy.isVector() && VecTy.getElementType() == EltTy && + return VecTy.isFixedVector() && VecTy.getElementType() == EltTy && VecTy.getNumElements() > MaxElements; }, [=](const LegalityQuery &Query) { @@ -1231,6 +1230,9 @@ class LegalizeRuleSet { assert(MinTy.getElementType() == MaxTy.getElementType() && "Expected element types to agree"); + assert((!MinTy.isScalableVector() && !MaxTy.isScalableVector()) && + "Unexpected scalable vectors"); + const LLT EltTy = MinTy.getElementType(); return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements()) .clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements()); diff --git a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def index 82066b48c84b406..8ff59f60968bebf 100644 --- a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def +++ b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def @@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx, const unsigned MinSize = Size.getKnownMinValue(); assert((!Size.isScalable() || MinSize >= 128) && "Scalable vector types should have size of at least 128 bits"); + if (Size.isScalable()) + return 3; if (MinSize <= 16) return 0; if (MinSize <= 32) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 5aee7804de3e3fd..6cbfb018b3183ad 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -393,8 +393,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, // i1 is a special case because SDAG i1 true is naturally zero extended // when widened using ANYEXT. We need to do it explicitly here. auto &Flags = CurArgInfo.Flags[0]; - if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() && - !Flags.isZExt()) { + if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) && + !Flags.isSExt() && !Flags.isZExt()) { CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) == 1) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index df0c09d32c074ac..afea08ab0925011 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, unsigned RegBankID = RB.getID(); if (RegBankID == AArch64::GPRRegBankID) { + assert(!SizeInBits.isScalable() && "Unexpected scalable register size"); if (SizeInBits <= 32) return GetAllRegSet ? &AArch64::GPR32allRegClass : &AArch64::GPR32RegClass; @@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, } if (RegBankID == AArch64::FPRRegBankID) { + if (SizeInBits.isScalable()) { + assert(SizeInBits == TypeSize::getScalable(128) && + "Unexpected scalable register size"); + return &AArch64::ZPRRegClass; + } + switch (SizeInBits) { default: return nullptr; @@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, // then we can pull it into the helpers that get the appropriate class for a // register bank. Or make a new helper that carries along some constraint // information. - if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) + if (SrcRegBank != DstRegBank && + (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1))) SrcSize = DstSize = TypeSize::getFixed(32); return {getMinClassForRegBank(SrcRegBank, SrcSize, true), diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5cd1fea75025cd1..dd65dbe594a6345 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) const bool HasCSSC = ST.hasCSSC(); const bool HasRCPC3 = ST.hasRCPC3(); + const bool HasSVE = ST.hasSVE(); getActionDefinitionsBuilder( {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER}) @@ -127,7 +128,34 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v2s64, v2s64) .moreElementsToNextPow2(0); - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) + getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR}) + .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8}) + .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64}) + .widenScalarToNextPow2(0) + .clampScalar(0, s32, s64) + .clampMaxNumElements(0, s8, 16) + .clampMaxNumElements(0, s16, 8) + .clampNumElements(0, v2s32, v4s32) + .clampNumElements(0, v2s64, v2s64) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + return Query.Types[0].getNumElements() <= 2; + }, + 0, s32) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + return Query.Types[0].getNumElements() <= 4; + }, + 0, s16) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + return Query.Types[0].getNumElements() <= 16; + }, + 0, s8) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) + .moreElementsToNextPow2(0); + + getActionDefinitionsBuilder(G_MUL) .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8}) .widenScalarToNextPow2(0) .clampScalar(0, s32, s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 80b6e4f6d528a2a..0af60a503c5f1c6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -24,7 +24,6 @@ # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # # DEBUG-NEXT: G_MUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll new file mode 100644 index 000000000000000..bc51cf7bac23c95 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 | FileCheck %s +; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s + +;; add +define @addnxv2i64( %a, %b) { +; CHECK-LABEL: addnxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = add %a, %b + ret %c +} + +define @addnxv4i32( %a, %b) { +; CHECK-LABEL: addnxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ret +entry: + %c = add %a, %b + ret %c +} + +define @addnxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: addnxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ret +entry: + %c = add %a, %b + ret %c +} + +define @addnxv16i8( %a, %b) { +; CHECK-LABEL: addnxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: ret +entry: + %c = add %a, %b + ret %c +} + +;; sub +define @subnxv2i64( %a, %b) { +; CHECK-LABEL: subnxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = sub %a, %b + ret %c +} + +define @subnxv4i32( %a, %b) { +; CHECK-LABEL: subnxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: ret +entry: + %c = sub %a, %b + ret %c +} + +define @subnxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: subnxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: ret +entry: + %c = sub %a, %b + ret %c +} + +define @subnxv16i8( %a, %b) { +; CHECK-LABEL: subnxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: ret +entry: + %c = sub %a, %b + ret %c +} + +;; and +define @andnxv2i64( %a, %b) { +; CHECK-LABEL: andnxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = and %a, %b + ret %c +} + +define @andnxv4i32( %a, %b) { +; CHECK-LABEL: andnxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = and %a, %b + ret %c +} + +define @andnxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: andnxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = and %a, %b + ret %c +} + +define @andnxv16i8( %a, %b) { +; CHECK-LABEL: andnxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = and %a, %b + ret %c +} + +;; or +define @ornxv2i64( %a, %b) { +; CHECK-LABEL: ornxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = or %a, %b + ret %c +} + +define @ornxv4i32( %a, %b) { +; CHECK-LABEL: ornxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = or %a, %b + ret %c +} + +define @ornxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: ornxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = or %a, %b + ret %c +} + +define @ornxv16i8( %a, %b) { +; CHECK-LABEL: ornxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = or %a, %b + ret %c +} + +;; xor +define @xornxv2i64( %a, %b) { +; CHECK-LABEL: xornxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = xor %a, %b + ret %c +} + +define @xornxv4i32( %a, %b) { +; CHECK-LABEL: xornxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = xor %a, %b + ret %c +} + +define @xornxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: xornxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = xor %a, %b + ret %c +} + +define @xornxv16i8( %a, %b) { +; CHECK-LABEL: xornxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = xor %a, %b + ret %c +} From fb33af08e4c105a05855f8beeb972d493410e72f Mon Sep 17 00:00:00 2001 From: Alex MacLean Date: Sun, 27 Oct 2024 16:14:13 -0700 Subject: [PATCH 122/425] [NVPTX] Remove nvvm.ldg.global.* intrinsics (#112834) Remove these intrinsics which can be better represented by load instructions with `!invariant.load` metadata: - llvm.nvvm.ldg.global.i - llvm.nvvm.ldg.global.f - llvm.nvvm.ldg.global.p --- clang/lib/CodeGen/CGBuiltin.cpp | 47 +++-- .../builtins-nvptx-native-half-type-native.c | 4 +- .../CodeGen/builtins-nvptx-native-half-type.c | 4 +- clang/test/CodeGen/builtins-nvptx.c | 72 +++---- llvm/docs/ReleaseNotes.md | 9 +- llvm/include/llvm/IR/IntrinsicsNVVM.td | 18 +- llvm/lib/IR/AutoUpgrade.cpp | 14 ++ llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 189 +++++++----------- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 55 +---- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 2 - .../Assembler/auto_upgrade_nvvm_intrinsics.ll | 31 +++ 11 files changed, 197 insertions(+), 248 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e2d03eff8ab4a0f..911eec48bcb2fd4 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -20492,8 +20492,8 @@ static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { #undef MMA_VARIANTS_B1_XOR } -static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF, - const CallExpr *E) { +static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF, + const CallExpr *E) { Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); QualType ArgType = E->getArg(0)->getType(); clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType); @@ -20503,6 +20503,21 @@ static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF, {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())}); } +static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) { + Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); + QualType ArgType = E->getArg(0)->getType(); + clang::CharUnits AlignV = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType); + llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType()); + + // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL + auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1)); + auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign()); + MDNode *MD = MDNode::get(CGF.Builder.getContext(), {}); + LD->setMetadata(LLVMContext::MD_invariant_load, MD); + + return LD; +} + static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF, const CallExpr *E) { Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); @@ -20536,9 +20551,11 @@ static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID, return nullptr; } - if (IntrinsicID == Intrinsic::nvvm_ldg_global_f || - IntrinsicID == Intrinsic::nvvm_ldu_global_f) - return MakeLdgLdu(IntrinsicID, CGF, E); + if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2) + return MakeLdg(CGF, E); + + if (IntrinsicID == Intrinsic::nvvm_ldu_global_f) + return MakeLdu(IntrinsicID, CGF, E); SmallVector Args; auto *F = CGF.CGM.getIntrinsic(IntrinsicID); @@ -20675,16 +20692,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_ldg_ul2: case NVPTX::BI__nvvm_ldg_ull: case NVPTX::BI__nvvm_ldg_ull2: - // PTX Interoperability section 2.2: "For a vector with an even number of - // elements, its alignment is set to number of elements times the alignment - // of its member: n*alignof(t)." - return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E); case NVPTX::BI__nvvm_ldg_f: case NVPTX::BI__nvvm_ldg_f2: case NVPTX::BI__nvvm_ldg_f4: case NVPTX::BI__nvvm_ldg_d: case NVPTX::BI__nvvm_ldg_d2: - return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E); + // PTX Interoperability section 2.2: "For a vector with an even number of + // elements, its alignment is set to number of elements times the alignment + // of its member: n*alignof(t)." + return MakeLdg(*this, E); case NVPTX::BI__nvvm_ldu_c: case NVPTX::BI__nvvm_ldu_sc: @@ -20715,13 +20731,13 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, case NVPTX::BI__nvvm_ldu_ul2: case NVPTX::BI__nvvm_ldu_ull: case NVPTX::BI__nvvm_ldu_ull2: - return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E); + return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E); case NVPTX::BI__nvvm_ldu_f: case NVPTX::BI__nvvm_ldu_f2: case NVPTX::BI__nvvm_ldu_f4: case NVPTX::BI__nvvm_ldu_d: case NVPTX::BI__nvvm_ldu_d2: - return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E); + return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E); case NVPTX::BI__nvvm_atom_cta_add_gen_i: case NVPTX::BI__nvvm_atom_cta_add_gen_l: @@ -21195,14 +21211,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_ldg_h: - return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); case NVPTX::BI__nvvm_ldg_h2: - return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); + return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this); case NVPTX::BI__nvvm_ldu_h: + case NVPTX::BI__nvvm_ldu_h2: return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); - case NVPTX::BI__nvvm_ldu_h2: { - return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); - } case NVPTX::BI__nvvm_cp_async_ca_shared_global_4: return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4, Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E, diff --git a/clang/test/CodeGen/builtins-nvptx-native-half-type-native.c b/clang/test/CodeGen/builtins-nvptx-native-half-type-native.c index b594fc876d4b9eb..035c4c6066be247 100644 --- a/clang/test/CodeGen/builtins-nvptx-native-half-type-native.c +++ b/clang/test/CodeGen/builtins-nvptx-native-half-type-native.c @@ -52,8 +52,8 @@ typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2))); // CHECK: call <2 x half> @llvm.nvvm.fmax.ftz.xorsign.abs.f16x2(<2 x half> {{.*}}, <2 x half> {{.*}}) // CHECK: call <2 x half> @llvm.nvvm.fmax.nan.xorsign.abs.f16x2(<2 x half> {{.*}}, <2 x half> {{.*}}) // CHECK: call <2 x half> @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16x2(<2 x half> {{.*}}, <2 x half> {{.*}}) -// CHECK: call half @llvm.nvvm.ldg.global.f.f16.p0(ptr {{.*}}, i32 2) -// CHECK: call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p0(ptr {{.*}}, i32 4) +// CHECK: load half, ptr addrspace(1) {{.*}}, align 2, !invariant.load +// CHECK: load <2 x half>, ptr addrspace(1) {{.*}}, align 4, !invariant.load // CHECK: call half @llvm.nvvm.ldu.global.f.f16.p0(ptr {{.*}}, i32 2) // CHECK: call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p0(ptr {{.*}}, i32 4) __device__ void nvvm_native_half_types(void *a, void*b, void*c, __fp16* out) { diff --git a/clang/test/CodeGen/builtins-nvptx-native-half-type.c b/clang/test/CodeGen/builtins-nvptx-native-half-type.c index 4aeae953bc1622c..511497702ff7f9e 100644 --- a/clang/test/CodeGen/builtins-nvptx-native-half-type.c +++ b/clang/test/CodeGen/builtins-nvptx-native-half-type.c @@ -177,9 +177,9 @@ typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2))); // CHECK-LABEL: nvvm_ldg_native_half_types __device__ void nvvm_ldg_native_half_types(const void *p) { - // CHECK: call half @llvm.nvvm.ldg.global.f.f16.p0 + // CHECK: load half, ptr addrspace(1) {{.*}}, align 2, !invariant.load __nvvm_ldg_h((const __fp16 *)p); - // CHECK: call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p0 + // CHECK: load <2 x half>, ptr addrspace(1) {{.*}}, align 4, !invariant.load __nvvm_ldg_h2((const __fp16v2 *)p); } diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index 0d0e3ecdb90c9e4..3406cbdde2bf880 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -598,33 +598,33 @@ __device__ void nvvm_atom(float *fp, float f, double *dfp, double df, // CHECK-LABEL: nvvm_ldg __device__ void nvvm_ldg(const void *p) { - // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) - // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) - // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1) + // CHECK: load i8, ptr addrspace(1) {{%[0-9]+}}, align 1, !invariant.load + // CHECK: load i8, ptr addrspace(1) {{%[0-9]+}}, align 1, !invariant.load + // CHECK: load i8, ptr addrspace(1) {{%[0-9]+}}, align 1, !invariant.load __nvvm_ldg_c((const char *)p); __nvvm_ldg_uc((const unsigned char *)p); __nvvm_ldg_sc((const signed char *)p); - // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2) - // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2) + // CHECK: load i16, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load + // CHECK: load i16, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load __nvvm_ldg_s((const short *)p); __nvvm_ldg_us((const unsigned short *)p); - // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4) - // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4) + // CHECK: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load + // CHECK: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load __nvvm_ldg_i((const int *)p); __nvvm_ldg_ui((const unsigned int *)p); - // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4) - // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4) - // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8) - // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8) + // LP32: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load + // LP32: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load + // LP64: load i64, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load + // LP64: load i64, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load __nvvm_ldg_l((const long *)p); __nvvm_ldg_ul((const unsigned long *)p); - // CHECK: call float @llvm.nvvm.ldg.global.f.f32.p0(ptr {{%[0-9]+}}, i32 4) + // CHECK: load float, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load __nvvm_ldg_f((const float *)p); - // CHECK: call double @llvm.nvvm.ldg.global.f.f64.p0(ptr {{%[0-9]+}}, i32 8) + // CHECK: load double, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load __nvvm_ldg_d((const double *)p); // In practice, the pointers we pass to __ldg will be aligned as appropriate @@ -636,9 +636,9 @@ __device__ void nvvm_ldg(const void *p) { // elements, its alignment is set to number of elements times the alignment of // its member: n*alignof(t)." - // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2) - // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2) - // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2) + // CHECK: load <2 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load + // CHECK: load <2 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load + // CHECK: load <2 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load typedef char char2 __attribute__((ext_vector_type(2))); typedef unsigned char uchar2 __attribute__((ext_vector_type(2))); typedef signed char schar2 __attribute__((ext_vector_type(2))); @@ -646,9 +646,9 @@ __device__ void nvvm_ldg(const void *p) { __nvvm_ldg_uc2((const uchar2 *)p); __nvvm_ldg_sc2((const schar2 *)p); - // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4) - // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4) - // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4) + // CHECK: load <4 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load + // CHECK: load <4 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load + // CHECK: load <4 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load typedef char char4 __attribute__((ext_vector_type(4))); typedef unsigned char uchar4 __attribute__((ext_vector_type(4))); typedef signed char schar4 __attribute__((ext_vector_type(4))); @@ -656,59 +656,59 @@ __device__ void nvvm_ldg(const void *p) { __nvvm_ldg_uc4((const uchar4 *)p); __nvvm_ldg_sc4((const schar4 *)p); - // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4) - // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4) + // CHECK: load <2 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load + // CHECK: load <2 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load typedef short short2 __attribute__((ext_vector_type(2))); typedef unsigned short ushort2 __attribute__((ext_vector_type(2))); __nvvm_ldg_s2((const short2 *)p); __nvvm_ldg_us2((const ushort2 *)p); - // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0(ptr {{%[0-9]+}}, i32 8) - // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0(ptr {{%[0-9]+}}, i32 8) + // CHECK: load <4 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load + // CHECK: load <4 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load typedef short short4 __attribute__((ext_vector_type(4))); typedef unsigned short ushort4 __attribute__((ext_vector_type(4))); __nvvm_ldg_s4((const short4 *)p); __nvvm_ldg_us4((const ushort4 *)p); - // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8) - // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8) + // CHECK: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load + // CHECK: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load typedef int int2 __attribute__((ext_vector_type(2))); typedef unsigned int uint2 __attribute__((ext_vector_type(2))); __nvvm_ldg_i2((const int2 *)p); __nvvm_ldg_ui2((const uint2 *)p); - // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0(ptr {{%[0-9]+}}, i32 16) - // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0(ptr {{%[0-9]+}}, i32 16) + // CHECK: load <4 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load + // CHECK: load <4 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load typedef int int4 __attribute__((ext_vector_type(4))); typedef unsigned int uint4 __attribute__((ext_vector_type(4))); __nvvm_ldg_i4((const int4 *)p); __nvvm_ldg_ui4((const uint4 *)p); - // LP32: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8) - // LP32: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8) - // LP64: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) - // LP64: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) + // LP32: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load + // LP32: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load + // LP64: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load + // LP64: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load typedef long long2 __attribute__((ext_vector_type(2))); typedef unsigned long ulong2 __attribute__((ext_vector_type(2))); __nvvm_ldg_l2((const long2 *)p); __nvvm_ldg_ul2((const ulong2 *)p); - // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) - // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16) + // CHECK: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load + // CHECK: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load typedef long long longlong2 __attribute__((ext_vector_type(2))); typedef unsigned long long ulonglong2 __attribute__((ext_vector_type(2))); __nvvm_ldg_ll2((const longlong2 *)p); __nvvm_ldg_ull2((const ulonglong2 *)p); - // CHECK: call <2 x float> @llvm.nvvm.ldg.global.f.v2f32.p0(ptr {{%[0-9]+}}, i32 8) + // CHECK: load <2 x float>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load typedef float float2 __attribute__((ext_vector_type(2))); __nvvm_ldg_f2((const float2 *)p); - // CHECK: call <4 x float> @llvm.nvvm.ldg.global.f.v4f32.p0(ptr {{%[0-9]+}}, i32 16) + // CHECK: load <4 x float>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load typedef float float4 __attribute__((ext_vector_type(4))); __nvvm_ldg_f4((const float4 *)p); - // CHECK: call <2 x double> @llvm.nvvm.ldg.global.f.v2f64.p0(ptr {{%[0-9]+}}, i32 16) + // CHECK: load <2 x double>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load typedef double double2 __attribute__((ext_vector_type(2))); __nvvm_ldg_d2((const double2 *)p); } diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index e3d93f0dfd0ec55..3cac3e57344dc32 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -88,7 +88,14 @@ Changes to the LLVM IR * `llvm.nvvm.ptr.shared.to.gen` * `llvm.nvvm.ptr.constant.to.gen` * `llvm.nvvm.ptr.local.to.gen` - + +* Remove the following intrinsics which can be relaced with a load from + addrspace(1) with an !invariant.load metadata + + * `llvm.nvvm.ldg.global.i` + * `llvm.nvvm.ldg.global.f` + * `llvm.nvvm.ldg.global.p` + * Operand bundle values can now be metadata strings. Changes to LLVM infrastructure diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index b4a06f583f2c911..5164f873d00f482 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -42,6 +42,9 @@ // * llvm.nvvm.ptr.shared.to.gen --> ibid. // * llvm.nvvm.ptr.constant.to.gen --> ibid. // * llvm.nvvm.ptr.local.to.gen --> ibid. +// * llvm.nvvm.ldg.global.i --> load addrspace(1) !load.invariant +// * llvm.nvvm.ldg.global.f --> ibid. +// * llvm.nvvm.ldg.global.p --> ibid. def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr @@ -1605,21 +1608,6 @@ def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldu.global.p">; -// Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the -// pointer's alignment. -def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty], - [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], - "llvm.nvvm.ldg.global.i">; -def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty], - [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], - "llvm.nvvm.ldg.global.f">; -def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], - [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], - "llvm.nvvm.ldg.global.p">; - // Used in nvvm internally to help address space opt and ptx code generation // This is for params that are passed to kernel functions by pointer by-val. def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty], diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index bb03c9290e4cf41..73882fbc7a251ab 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -37,6 +37,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/CommandLine.h" @@ -1301,6 +1302,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, (Name.consume_front("local") || Name.consume_front("shared") || Name.consume_front("global") || Name.consume_front("constant")) && Name.starts_with(".to.gen"); + else if (Name.consume_front("ldg.global.")) + // nvvm.ldg.global.{i,p,f} + Expand = (Name.starts_with("i.") || Name.starts_with("f.") || + Name.starts_with("p.")); else Expand = false; @@ -2363,6 +2368,15 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Name.consume_front("constant")) && Name.starts_with(".to.gen"))) { Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType()); + } else if (Name.consume_front("ldg.global")) { + Value *Ptr = CI->getArgOperand(0); + Align PtrAlign = cast(CI->getArgOperand(1))->getAlignValue(); + // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL + Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1)); + Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign); + MDNode *MD = MDNode::get(Builder.getContext(), {}); + LD->setMetadata(LLVMContext::MD_invariant_load, MD); + return LD; } else { Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); if (IID != Intrinsic::not_intrinsic && diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 93c2d92ef7c1c84..965ed98630a28d9 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -126,8 +126,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) { if (tryLoadVector(N)) return; break; - case NVPTXISD::LDGV2: - case NVPTXISD::LDGV4: case NVPTXISD::LDUV2: case NVPTXISD::LDUV4: if (tryLDGLDU(N)) @@ -550,9 +548,6 @@ bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { switch (IID) { default: return false; - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_p: case Intrinsic::nvvm_ldu_global_f: case Intrinsic::nvvm_ldu_global_i: case Intrinsic::nvvm_ldu_global_p: @@ -1559,34 +1554,11 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) { } bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { - SDValue Op1; - MemSDNode *Mem; - bool IsLDG = true; + auto *Mem = cast(N); // If this is an LDG intrinsic, the address is the third operand. If its an // LDG/LDU SD node (from custom vector handling), then its the second operand - if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { - Op1 = N->getOperand(2); - Mem = cast(N); - unsigned IID = N->getConstantOperandVal(1); - switch (IID) { - default: - return false; - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_p: - IsLDG = true; - break; - case Intrinsic::nvvm_ldu_global_f: - case Intrinsic::nvvm_ldu_global_i: - case Intrinsic::nvvm_ldu_global_p: - IsLDG = false; - break; - } - } else { - Op1 = N->getOperand(1); - Mem = cast(N); - } + SDValue Op1 = N->getOperand(N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 2 : 1); EVT OrigType = N->getValueType(0); EVT EltVT = Mem->getMemoryVT(); @@ -1629,26 +1601,20 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { default: return false; case ISD::LOAD: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8avar, + NVPTX::INT_PTX_LDG_GLOBAL_i16avar, NVPTX::INT_PTX_LDG_GLOBAL_i32avar, + NVPTX::INT_PTX_LDG_GLOBAL_i64avar, NVPTX::INT_PTX_LDG_GLOBAL_f32avar, + NVPTX::INT_PTX_LDG_GLOBAL_f64avar); + break; case ISD::INTRINSIC_W_CHAIN: - if (IsLDG) - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8avar, - NVPTX::INT_PTX_LDG_GLOBAL_i16avar, - NVPTX::INT_PTX_LDG_GLOBAL_i32avar, - NVPTX::INT_PTX_LDG_GLOBAL_i64avar, - NVPTX::INT_PTX_LDG_GLOBAL_f32avar, - NVPTX::INT_PTX_LDG_GLOBAL_f64avar); - else - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8avar, - NVPTX::INT_PTX_LDU_GLOBAL_i16avar, - NVPTX::INT_PTX_LDU_GLOBAL_i32avar, - NVPTX::INT_PTX_LDU_GLOBAL_i64avar, - NVPTX::INT_PTX_LDU_GLOBAL_f32avar, - NVPTX::INT_PTX_LDU_GLOBAL_f64avar); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8avar, + NVPTX::INT_PTX_LDU_GLOBAL_i16avar, NVPTX::INT_PTX_LDU_GLOBAL_i32avar, + NVPTX::INT_PTX_LDU_GLOBAL_i64avar, NVPTX::INT_PTX_LDU_GLOBAL_f32avar, + NVPTX::INT_PTX_LDU_GLOBAL_f64avar); break; case NVPTXISD::LoadV2: - case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar, NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar, @@ -1667,7 +1633,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar); break; case NVPTXISD::LoadV4: - case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar, @@ -1693,26 +1658,24 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { default: return false; case ISD::LOAD: + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, + NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, + NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, + NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, + NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, + NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); + break; case ISD::INTRINSIC_W_CHAIN: - if (IsLDG) - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, - NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); - else - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, - NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::INT_PTX_LDU_GLOBAL_i8ari64, + NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, + NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, + NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, + NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, + NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); break; case NVPTXISD::LoadV2: - case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, @@ -1731,7 +1694,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); break; case NVPTXISD::LoadV4: - case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, @@ -1751,26 +1713,20 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { default: return false; case ISD::LOAD: + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8ari, + NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari, + NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari, + NVPTX::INT_PTX_LDG_GLOBAL_f64ari); + break; case ISD::INTRINSIC_W_CHAIN: - if (IsLDG) - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8ari, - NVPTX::INT_PTX_LDG_GLOBAL_i16ari, - NVPTX::INT_PTX_LDG_GLOBAL_i32ari, - NVPTX::INT_PTX_LDG_GLOBAL_i64ari, - NVPTX::INT_PTX_LDG_GLOBAL_f32ari, - NVPTX::INT_PTX_LDG_GLOBAL_f64ari); - else - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8ari, - NVPTX::INT_PTX_LDU_GLOBAL_i16ari, - NVPTX::INT_PTX_LDU_GLOBAL_i32ari, - NVPTX::INT_PTX_LDU_GLOBAL_i64ari, - NVPTX::INT_PTX_LDU_GLOBAL_f32ari, - NVPTX::INT_PTX_LDU_GLOBAL_f64ari); + Opcode = pickOpcodeForVT( + EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8ari, + NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari, + NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari, + NVPTX::INT_PTX_LDU_GLOBAL_f64ari); break; case NVPTXISD::LoadV2: - case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, @@ -1789,7 +1745,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); break; case NVPTXISD::LoadV4: - case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, @@ -1815,26 +1770,24 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { default: return false; case ISD::LOAD: + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::INT_PTX_LDG_GLOBAL_i8areg64, + NVPTX::INT_PTX_LDG_GLOBAL_i16areg64, + NVPTX::INT_PTX_LDG_GLOBAL_i32areg64, + NVPTX::INT_PTX_LDG_GLOBAL_i64areg64, + NVPTX::INT_PTX_LDG_GLOBAL_f32areg64, + NVPTX::INT_PTX_LDG_GLOBAL_f64areg64); + break; case ISD::INTRINSIC_W_CHAIN: - if (IsLDG) - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8areg64, - NVPTX::INT_PTX_LDG_GLOBAL_i16areg64, - NVPTX::INT_PTX_LDG_GLOBAL_i32areg64, - NVPTX::INT_PTX_LDG_GLOBAL_i64areg64, - NVPTX::INT_PTX_LDG_GLOBAL_f32areg64, - NVPTX::INT_PTX_LDG_GLOBAL_f64areg64); - else - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8areg64, - NVPTX::INT_PTX_LDU_GLOBAL_i16areg64, - NVPTX::INT_PTX_LDU_GLOBAL_i32areg64, - NVPTX::INT_PTX_LDU_GLOBAL_i64areg64, - NVPTX::INT_PTX_LDU_GLOBAL_f32areg64, - NVPTX::INT_PTX_LDU_GLOBAL_f64areg64); + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::INT_PTX_LDU_GLOBAL_i8areg64, + NVPTX::INT_PTX_LDU_GLOBAL_i16areg64, + NVPTX::INT_PTX_LDU_GLOBAL_i32areg64, + NVPTX::INT_PTX_LDU_GLOBAL_i64areg64, + NVPTX::INT_PTX_LDU_GLOBAL_f32areg64, + NVPTX::INT_PTX_LDU_GLOBAL_f64areg64); break; case NVPTXISD::LoadV2: - case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64, @@ -1853,7 +1806,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64); break; case NVPTXISD::LoadV4: - case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64, NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64, @@ -1873,26 +1825,24 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { default: return false; case ISD::LOAD: + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::INT_PTX_LDG_GLOBAL_i8areg, + NVPTX::INT_PTX_LDG_GLOBAL_i16areg, + NVPTX::INT_PTX_LDG_GLOBAL_i32areg, + NVPTX::INT_PTX_LDG_GLOBAL_i64areg, + NVPTX::INT_PTX_LDG_GLOBAL_f32areg, + NVPTX::INT_PTX_LDG_GLOBAL_f64areg); + break; case ISD::INTRINSIC_W_CHAIN: - if (IsLDG) - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8areg, - NVPTX::INT_PTX_LDG_GLOBAL_i16areg, - NVPTX::INT_PTX_LDG_GLOBAL_i32areg, - NVPTX::INT_PTX_LDG_GLOBAL_i64areg, - NVPTX::INT_PTX_LDG_GLOBAL_f32areg, - NVPTX::INT_PTX_LDG_GLOBAL_f64areg); - else - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8areg, - NVPTX::INT_PTX_LDU_GLOBAL_i16areg, - NVPTX::INT_PTX_LDU_GLOBAL_i32areg, - NVPTX::INT_PTX_LDU_GLOBAL_i64areg, - NVPTX::INT_PTX_LDU_GLOBAL_f32areg, - NVPTX::INT_PTX_LDU_GLOBAL_f64areg); + Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::INT_PTX_LDU_GLOBAL_i8areg, + NVPTX::INT_PTX_LDU_GLOBAL_i16areg, + NVPTX::INT_PTX_LDU_GLOBAL_i32areg, + NVPTX::INT_PTX_LDU_GLOBAL_i64areg, + NVPTX::INT_PTX_LDU_GLOBAL_f32areg, + NVPTX::INT_PTX_LDU_GLOBAL_f64areg); break; case NVPTXISD::LoadV2: - case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32, @@ -1911,7 +1861,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) { NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32); break; case NVPTXISD::LoadV4: - case NVPTXISD::LDGV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32, NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32, diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 57bc5fe0ac361c2..a95cba586b8fc39 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -949,8 +949,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(NVPTXISD::ProxyReg) MAKE_CASE(NVPTXISD::LoadV2) MAKE_CASE(NVPTXISD::LoadV4) - MAKE_CASE(NVPTXISD::LDGV2) - MAKE_CASE(NVPTXISD::LDGV4) MAKE_CASE(NVPTXISD::LDUV2) MAKE_CASE(NVPTXISD::LDUV4) MAKE_CASE(NVPTXISD::StoreV2) @@ -4774,26 +4772,6 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( return true; } - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_p: { - auto &DL = I.getDataLayout(); - - Info.opc = ISD::INTRINSIC_W_CHAIN; - if (Intrinsic == Intrinsic::nvvm_ldg_global_i) - Info.memVT = getValueType(DL, I.getType()); - else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) - Info.memVT = getPointerTy(DL); - else - Info.memVT = getValueType(DL, I.getType()); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Info.flags = MachineMemOperand::MOLoad; - Info.align = cast(I.getArgOperand(1))->getMaybeAlignValue(); - - return true; - } - case Intrinsic::nvvm_tex_1d_v4f32_s32: case Intrinsic::nvvm_tex_1d_v4f32_f32: case Intrinsic::nvvm_tex_1d_level_v4f32_f32: @@ -6308,9 +6286,6 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, switch (IntrinNo) { default: return; - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_p: case Intrinsic::nvvm_ldu_global_i: case Intrinsic::nvvm_ldu_global_f: case Intrinsic::nvvm_ldu_global_p: { @@ -6339,37 +6314,11 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, default: return; case 2: - switch (IntrinNo) { - default: - return; - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_p: - Opcode = NVPTXISD::LDGV2; - break; - case Intrinsic::nvvm_ldu_global_i: - case Intrinsic::nvvm_ldu_global_f: - case Intrinsic::nvvm_ldu_global_p: - Opcode = NVPTXISD::LDUV2; - break; - } + Opcode = NVPTXISD::LDUV2; LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); break; case 4: { - switch (IntrinNo) { - default: - return; - case Intrinsic::nvvm_ldg_global_i: - case Intrinsic::nvvm_ldg_global_f: - case Intrinsic::nvvm_ldg_global_p: - Opcode = NVPTXISD::LDGV4; - break; - case Intrinsic::nvvm_ldu_global_i: - case Intrinsic::nvvm_ldu_global_f: - case Intrinsic::nvvm_ldu_global_p: - Opcode = NVPTXISD::LDUV4; - break; - } + Opcode = NVPTXISD::LDUV4; EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; LdResVTs = DAG.getVTList(ListVTs); break; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 8c3a597ce0b085b..824a659671967a0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -70,8 +70,6 @@ enum NodeType : unsigned { LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, LoadV4, - LDGV2, // LDG.v2 - LDGV4, // LDG.v4 LDUV2, // LDU.v2 LDUV4, // LDU.v4 StoreV2, diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll index 584c0ef7cfeb785..5cc3a30277459b9 100644 --- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll +++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll @@ -44,6 +44,13 @@ declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3)) declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4)) declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5)) +declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1), i32) +declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1), i32) +declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1), i32) +declare i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr, i32) +declare ptr @llvm.nvvm.ldg.global.p.p0(ptr, i32) +declare float @llvm.nvvm.ldg.global.f.f32.p0(ptr, i32) + ; CHECK-LABEL: @simple_upgrade define void @simple_upgrade(i32 %a, i64 %b, i16 %c) { ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a) @@ -191,3 +198,27 @@ define void @addrspacecast(ptr %p0) { ret void } + +; CHECK-LABEL: @ldg +define void @ldg(ptr %p0, ptr addrspace(1) %p1) { +; CHECK: %1 = load i32, ptr addrspace(1) %p1, align 4, !invariant.load !0 +; CHECK: %2 = load ptr, ptr addrspace(1) %p1, align 8, !invariant.load !0 +; CHECK: %3 = load float, ptr addrspace(1) %p1, align 16, !invariant.load !0 + +; CHECK: %4 = addrspacecast ptr %p0 to ptr addrspace(1) +; CHECK: %5 = load i32, ptr addrspace(1) %4, align 4, !invariant.load !0 +; CHECK: %6 = addrspacecast ptr %p0 to ptr addrspace(1) +; CHECK: %7 = load ptr, ptr addrspace(1) %6, align 8, !invariant.load !0 +; CHECK: %8 = addrspacecast ptr %p0 to ptr addrspace(1) +; CHECK: %9 = load float, ptr addrspace(1) %8, align 16, !invariant.load !0 +; + %v1 = call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %p1, i32 4) + %v2 = call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %p1, i32 8 ) + %v3 = call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %p1, i32 16) + + %v4 = call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr %p0, i32 4) + %v5 = call ptr @llvm.nvvm.ldg.global.p.p0(ptr %p0, i32 8) + %v6 = call float @llvm.nvvm.ldg.global.f.f32.p0(ptr %p0, i32 16) + + ret void +} \ No newline at end of file From 1fe8e7838bb5118b9e48fa15fa21a4638bae8ae1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 27 Oct 2024 17:29:38 -0700 Subject: [PATCH 123/425] [X86] Rename the x86-asm-syntax variable. NFC Follow-up to #109360. x86-asm-syntax is for input instead of output. --- llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 47eb617c06ac5bb..39b0f7c4c4c1e63 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -23,9 +23,9 @@ enum AsmWriterFlavorTy { ATT = 0, Intel = 1 }; -static cl::opt AsmWriterFlavor( +static cl::opt X86AsmSyntax( "x86-asm-syntax", cl::init(ATT), cl::Hidden, - cl::desc("Choose style of code to emit from X86 backend:"), + cl::desc("Select the assembly style for input"), cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"), clEnumValN(Intel, "intel", "Emit Intel-style assembly"))); @@ -41,7 +41,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { if (is64Bit) CodePointerSize = CalleeSaveStackSlotSize = 8; - AssemblerDialect = AsmWriterFlavor; + AssemblerDialect = X86AsmSyntax; if (!is64Bit) Data64bitsDirective = nullptr; // we can't emit a 64-bit unit @@ -89,7 +89,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI. CalleeSaveStackSlotSize = is64Bit ? 8 : 4; - AssemblerDialect = AsmWriterFlavor; + AssemblerDialect = X86AsmSyntax; // Debug Information SupportsDebugInformation = true; @@ -126,7 +126,7 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { ExceptionsType = ExceptionHandling::WinEH; - AssemblerDialect = AsmWriterFlavor; + AssemblerDialect = X86AsmSyntax; AllowAtInName = true; } @@ -159,7 +159,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { ExceptionsType = ExceptionHandling::DwarfCFI; } - AssemblerDialect = AsmWriterFlavor; + AssemblerDialect = X86AsmSyntax; AllowAtInName = true; } From 40fffba9b29141d598bf36cdc4bb886e97143d06 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Mon, 28 Oct 2024 09:54:29 +0800 Subject: [PATCH 124/425] [X86][AVX10.2] Fix wrong predicates for BF16 feature (#113800) Since AVX10.2, we need to enable 128/256-bit vector by default and check for 512 feature for 512-bit vector. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 41 ++++++++++------------ llvm/test/CodeGen/X86/avx10_2bf16-arith.ll | 22 ++++++++++++ 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a6d77873ec2901b..9d447959faf55a9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2406,7 +2406,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addLegalFPImmediate(APFloat::getZero(APFloat::BFloat())); } - if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) { + if (!Subtarget.useSoftFloat() && Subtarget.hasBF16() && + Subtarget.useAVX512Regs()) { addRegisterClass(MVT::v32bf16, &X86::VR512RegClass); setF16Action(MVT::v32bf16, Expand); for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) @@ -2419,27 +2420,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) { - addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass); - addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass); - addRegisterClass(MVT::v32bf16, &X86::VR512RegClass); - - setOperationAction(ISD::FADD, MVT::v32bf16, Legal); - setOperationAction(ISD::FSUB, MVT::v32bf16, Legal); - setOperationAction(ISD::FMUL, MVT::v32bf16, Legal); - setOperationAction(ISD::FDIV, MVT::v32bf16, Legal); - setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal); - setOperationAction(ISD::FMA, MVT::v32bf16, Legal); - setOperationAction(ISD::SETCC, MVT::v32bf16, Custom); - if (Subtarget.hasVLX()) { - for (auto VT : {MVT::v8bf16, MVT::v16bf16}) { - setOperationAction(ISD::FADD, VT, Legal); - setOperationAction(ISD::FSUB, VT, Legal); - setOperationAction(ISD::FMUL, VT, Legal); - setOperationAction(ISD::FDIV, VT, Legal); - setOperationAction(ISD::FSQRT, VT, Legal); - setOperationAction(ISD::FMA, VT, Legal); - setOperationAction(ISD::SETCC, VT, Custom); - } + for (auto VT : {MVT::v8bf16, MVT::v16bf16}) { + setOperationAction(ISD::FADD, VT, Legal); + setOperationAction(ISD::FSUB, VT, Legal); + setOperationAction(ISD::FMUL, VT, Legal); + setOperationAction(ISD::FDIV, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::SETCC, VT, Custom); + } + if (Subtarget.hasAVX10_2_512()) { + setOperationAction(ISD::FADD, MVT::v32bf16, Legal); + setOperationAction(ISD::FSUB, MVT::v32bf16, Legal); + setOperationAction(ISD::FMUL, MVT::v32bf16, Legal); + setOperationAction(ISD::FDIV, MVT::v32bf16, Legal); + setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal); + setOperationAction(ISD::FMA, MVT::v32bf16, Legal); + setOperationAction(ISD::SETCC, MVT::v32bf16, Custom); } } diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll index e0f5679e8ac96dd..c97d27ff324bbbf 100644 --- a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll +++ b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll @@ -1166,3 +1166,25 @@ entry: %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer ret <8 x bfloat> %2 } + +define <32 x bfloat> @addv(<32 x bfloat> %a, <32 x bfloat> %b) nounwind { +; X64-LABEL: addv: +; X64: # %bb.0: +; X64-NEXT: vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2] +; X64-NEXT: vaddnepbf16 %ymm3, %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xcb] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: addv: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp # encoding: [0x55] +; X86-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-NEXT: andl $-32, %esp # encoding: [0x83,0xe4,0xe0] +; X86-NEXT: subl $32, %esp # encoding: [0x83,0xec,0x20] +; X86-NEXT: vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2] +; X86-NEXT: vaddnepbf16 8(%ebp), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0x8d,0x08,0x00,0x00,0x00] +; X86-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-NEXT: popl %ebp # encoding: [0x5d] +; X86-NEXT: retl # encoding: [0xc3] + %add = fadd <32 x bfloat> %a, %b + ret <32 x bfloat> %add +} From 7ad63c0e44ef277591497a176991e7723165611e Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Mon, 28 Oct 2024 09:54:51 +0800 Subject: [PATCH 125/425] [mlir][MathToFuncs] `MathToFuncs` only support integer type (#113693) This PR fixes a bug in `MathToFuncs` where it incorrectly converts index type for `math.ctlz` and `math.ipowi`, leading to a crash. Fixes #108150. --- .../Conversion/MathToFuncs/MathToFuncs.cpp | 21 +++++++++++++++---- mlir/test/Conversion/MathToFuncs/ctlz.mlir | 10 +++++++++ mlir/test/Conversion/MathToFuncs/ipowi.mlir | 11 ++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp b/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp index 3a567643ffdb8fc..df5396ac628cf67 100644 --- a/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp +++ b/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp @@ -781,6 +781,9 @@ struct ConvertMathToFuncsPass // or equal to minWidthOfFPowIExponent option value. bool isFPowIConvertible(math::FPowIOp op); + // Reture true, if operation is integer type. + bool isConvertible(Operation *op); + // Generate outlined implementations for power operations // and store them in funcImpls map. void generateOpImplementations(); @@ -798,13 +801,17 @@ bool ConvertMathToFuncsPass::isFPowIConvertible(math::FPowIOp op) { return (expTy && expTy.getWidth() >= minWidthOfFPowIExponent); } +bool ConvertMathToFuncsPass::isConvertible(Operation *op) { + return isa(getElementTypeOrSelf(op->getResult(0).getType())); +} + void ConvertMathToFuncsPass::generateOpImplementations() { ModuleOp module = getOperation(); module.walk([&](Operation *op) { TypeSwitch(op) .Case([&](math::CountLeadingZerosOp op) { - if (!convertCtlz) + if (!convertCtlz || !isConvertible(op)) return; Type resultType = getElementTypeOrSelf(op.getResult().getType()); @@ -816,6 +823,9 @@ void ConvertMathToFuncsPass::generateOpImplementations() { entry.first->second = createCtlzFunc(&module, resultType); }) .Case([&](math::IPowIOp op) { + if (!isConvertible(op)) + return; + Type resultType = getElementTypeOrSelf(op.getResult().getType()); // Generate the software implementation of this operation, @@ -873,9 +883,12 @@ void ConvertMathToFuncsPass::runOnOperation() { func::FuncDialect, scf::SCFDialect, vector::VectorDialect>(); - target.addIllegalOp(); - if (convertCtlz) - target.addIllegalOp(); + target.addDynamicallyLegalOp( + [this](math::IPowIOp op) { return !isConvertible(op); }); + if (convertCtlz) { + target.addDynamicallyLegalOp( + [this](math::CountLeadingZerosOp op) { return !isConvertible(op); }); + } target.addDynamicallyLegalOp( [this](math::FPowIOp op) { return !isFPowIConvertible(op); }); if (failed(applyPartialConversion(module, target, std::move(patterns)))) diff --git a/mlir/test/Conversion/MathToFuncs/ctlz.mlir b/mlir/test/Conversion/MathToFuncs/ctlz.mlir index 4e262417d6a959d..b7ef0a8928912da 100644 --- a/mlir/test/Conversion/MathToFuncs/ctlz.mlir +++ b/mlir/test/Conversion/MathToFuncs/ctlz.mlir @@ -91,3 +91,13 @@ func.func @main(%arg0: i8) { func.return } +// ----- + +// Check that index is not converted + +// CHECK-LABEL: func.func @ctlz_index +// CHECK: math.ctlz +func.func @ctlz_index(%arg0: index) { + %0 = math.ctlz %arg0 : index + func.return +} diff --git a/mlir/test/Conversion/MathToFuncs/ipowi.mlir b/mlir/test/Conversion/MathToFuncs/ipowi.mlir index e464e9ca9564fca..2702a1e22e621de 100644 --- a/mlir/test/Conversion/MathToFuncs/ipowi.mlir +++ b/mlir/test/Conversion/MathToFuncs/ipowi.mlir @@ -170,3 +170,14 @@ func.func @ipowi_vec(%arg0: vector<2x3xi64>, %arg1: vector<2x3xi64>) { %0 = math.ipowi %arg0, %arg1 : vector<2x3xi64> func.return } + +// ----- + +// Check that index is not converted + +// CHECK-LABEL: func.func @ipowi_index +// CHECK: math.ipowi +func.func @ipowi_index(%arg0: index, %arg1: index) { + %0 = math.ipowi %arg0, %arg1 : index + func.return +} From 39ac64c1c0fc61a476aa22c53e6977608ead03cf Mon Sep 17 00:00:00 2001 From: donald chen Date: Mon, 28 Oct 2024 10:14:44 +0800 Subject: [PATCH 126/425] [mlir][Arith] ValueBoundsInterface: speedup arith.select (#113531) When calculating value bounds in the arith.select op , the compare function is invoked to compare trueValue and falseValue. This function rebuilds constraints, resulting in repeated computations of value bounds. In large-scale programs, this redundancy significantly impacts compilation time. --- .../Arith/IR/ValueBoundsOpInterfaceImpl.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp index 7cfcc4180539c2b..6de151594e3e9c6 100644 --- a/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp @@ -107,9 +107,10 @@ struct SelectOpInterface // If trueValue <= falseValue: // * result <= falseValue // * result >= trueValue - if (cstr.compare(/*lhs=*/{trueValue, dim}, - ValueBoundsConstraintSet::ComparisonOperator::LE, - /*rhs=*/{falseValue, dim})) { + if (cstr.populateAndCompare( + /*lhs=*/{trueValue, dim}, + ValueBoundsConstraintSet::ComparisonOperator::LE, + /*rhs=*/{falseValue, dim})) { if (dim) { cstr.bound(value)[*dim] >= cstr.getExpr(trueValue, dim); cstr.bound(value)[*dim] <= cstr.getExpr(falseValue, dim); @@ -121,9 +122,10 @@ struct SelectOpInterface // If falseValue <= trueValue: // * result <= trueValue // * result >= falseValue - if (cstr.compare(/*lhs=*/{falseValue, dim}, - ValueBoundsConstraintSet::ComparisonOperator::LE, - /*rhs=*/{trueValue, dim})) { + if (cstr.populateAndCompare( + /*lhs=*/{falseValue, dim}, + ValueBoundsConstraintSet::ComparisonOperator::LE, + /*rhs=*/{trueValue, dim})) { if (dim) { cstr.bound(value)[*dim] >= cstr.getExpr(falseValue, dim); cstr.bound(value)[*dim] <= cstr.getExpr(trueValue, dim); From 5aa1275d03b679f45f47f29f206292f663afed83 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Mon, 28 Oct 2024 10:46:16 +0800 Subject: [PATCH 127/425] [X86] Support SM4 EVEX version intrinsics/instructions. (#113402) Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368 --- clang/docs/ReleaseNotes.rst | 4 + clang/include/clang/Basic/BuiltinsX86.def | 4 + clang/lib/Headers/CMakeLists.txt | 1 + clang/lib/Headers/immintrin.h | 5 + clang/lib/Headers/sm4evexintrin.h | 32 +++ clang/test/CodeGen/X86/sm4-evex-builtins.c | 19 ++ llvm/docs/ReleaseNotes.md | 2 + llvm/include/llvm/IR/IntrinsicsX86.td | 10 + llvm/lib/Target/X86/X86InstrAVX10.td | 14 ++ llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll | 64 +++++ llvm/test/MC/Disassembler/X86/sm4-evex-32.txt | 170 +++++++++++++ llvm/test/MC/Disassembler/X86/sm4-evex-64.txt | 170 +++++++++++++ llvm/test/MC/X86/sm4-evex-32-att.s | 169 +++++++++++++ llvm/test/MC/X86/sm4-evex-32-intel.s | 169 +++++++++++++ llvm/test/MC/X86/sm4-evex-64-att.s | 224 ++++++++++++++++++ llvm/test/MC/X86/sm4-evex-64-intel.s | 169 +++++++++++++ llvm/test/TableGen/x86-fold-tables.inc | 6 + 17 files changed, 1232 insertions(+) create mode 100644 clang/lib/Headers/sm4evexintrin.h create mode 100644 clang/test/CodeGen/X86/sm4-evex-builtins.c create mode 100644 llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll create mode 100644 llvm/test/MC/Disassembler/X86/sm4-evex-32.txt create mode 100644 llvm/test/MC/Disassembler/X86/sm4-evex-64.txt create mode 100644 llvm/test/MC/X86/sm4-evex-32-att.s create mode 100644 llvm/test/MC/X86/sm4-evex-32-intel.s create mode 100644 llvm/test/MC/X86/sm4-evex-64-att.s create mode 100644 llvm/test/MC/X86/sm4-evex-64-intel.s diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6a95337815174bc..31ee4f7e516feda 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -628,6 +628,10 @@ X86 Support * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and ``*_(mask(z)))_minmax_s[s|d|h]``. +- Supported intrinsics for ``SM4 and AVX10.2``. + * Supported SM4 intrinsics of ``_mm512_sm4key4_epi32`` and + ``_mm512_sm4rnds4_epi32``. + - All intrinsics in adcintrin.h can now be used in constant expressions. - All intrinsics in adxintrin.h can now be used in constant expressions. diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 4c6b22cca421cab..4486eb73a11fa6a 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4") TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4") TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4") +// SM4_EVEX +TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4") +TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4") + // AVX10 MINMAX TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256") TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256") diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index e97953d87a2ff97..0211d1870b30a07 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -243,6 +243,7 @@ set(x86_files shaintrin.h sm3intrin.h sm4intrin.h + sm4evexintrin.h smmintrin.h tbmintrin.h tmmintrin.h diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h index 5f296d0a3324d00..65ad72bc479f49a 100644 --- a/clang/lib/Headers/immintrin.h +++ b/clang/lib/Headers/immintrin.h @@ -677,6 +677,11 @@ _storebe_i64(void * __P, long long __D) { #include #endif +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX10_2_512__) && defined(__SM4__)) +#include +#endif + #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) #include #endif diff --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h new file mode 100644 index 000000000000000..f6ae0037baea033 --- /dev/null +++ b/clang/lib/Headers/sm4evexintrin.h @@ -0,0 +1,32 @@ +/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __SM4EVEXINTRIN_H +#define __SM4EVEXINTRIN_H + +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("sm4,avx10.2-512"), __min_vector_width__(512))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_sm4key4_epi32(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif // __SM4EVEXINTRIN_H diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c new file mode 100644 index 000000000000000..0e54bd008d4fb0e --- /dev/null +++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \ +// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \ +// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include +#include + +__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_sm4key4_epi32( + // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + return _mm512_sm4key4_epi32(__A, __B); +} + +__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_sm4rnds4_epi32( + // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + return _mm512_sm4rnds4_epi32(__A, __B); +} diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 3cac3e57344dc32..7c7e687e94749e4 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -219,6 +219,8 @@ Changes to the X86 Backend * Supported instructions of `MOVRS AND AVX10.2` +* Supported ISA of `SM4(EVEX)`. + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index d0083017fb93836..0ecca157077fdc1 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -6099,6 +6099,11 @@ let TargetPrefix = "x86" in { DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_vsm4key4512 + : ClangBuiltin<"__builtin_ia32_vsm4key4512">, + DefaultAttrsIntrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty], + [IntrNoMem]>; def int_x86_vsm4rnds4128 : ClangBuiltin<"__builtin_ia32_vsm4rnds4128">, DefaultAttrsIntrinsic<[llvm_v4i32_ty], @@ -6109,6 +6114,11 @@ let TargetPrefix = "x86" in { DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>; + def int_x86_vsm4rnds4512 + : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">, + DefaultAttrsIntrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// // RAO-INT intrinsics diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 9ef2debb57fa007..4d64eb776e09cef 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1675,3 +1675,17 @@ defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>, T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>; defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>, T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>; + +// SM4(EVEX) +multiclass avx10_sm4_base { + // SM4_Base is in X86InstrSSE.td. + let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in { + defm Z128 : SM4_Base, EVEX_V128; + defm Z256 : SM4_Base, EVEX_V256; + } + let Predicates = [HasSM4, HasAVX10_2_512] in + defm Z : SM4_Base, EVEX_V512; +} + +defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV; +defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV; diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll new file mode 100644 index 000000000000000..825a11d66cd4523 --- /dev/null +++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s + +define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4key4128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4key4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B) + ret <4 x i32> %ret +} +declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B) + +define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4key4256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4key4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7e,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B) + ret <8 x i32> %ret +} +declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B) + +define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4key4512: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B) + ret <16 x i32> %ret +} +declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B) + +define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4rnds4128: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4rnds4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) + ret <4 x i32> %ret +} +declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) + +define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4rnds4256: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4rnds4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7f,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) + ret <8 x i32> %ret +} +declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) + +define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) { +; CHECK-LABEL: test_int_x86_vsm4rnds4512: +; CHECK: # %bb.0: +; CHECK-NEXT: vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) + ret <16 x i32> %ret +} +declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) + diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt new file mode 100644 index 000000000000000..c1cb271a967b13e --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vsm4key4 %zmm4, %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmm4 +0x62,0xf2,0x66,0x48,0xda,0xd4 + +# ATT: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%eax), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [eax] +0x62,0xf2,0x66,0x48,0xda,0x10 + +# ATT: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4key4 8128(%ecx), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf2,0x66,0x48,0xda,0x51,0x7f + +# ATT: vsm4key4 -8192(%edx), %zmm3, %zmm2 +# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192] +0x62,0xf2,0x66,0x48,0xda,0x52,0x80 + +# ATT: vsm4rnds4 %zmm4, %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmm4 +0x62,0xf2,0x67,0x48,0xda,0xd4 + +# ATT: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%eax), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax] +0x62,0xf2,0x67,0x48,0xda,0x10 + +# ATT: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4rnds4 8128(%ecx), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128] +0x62,0xf2,0x67,0x48,0xda,0x51,0x7f + +# ATT: vsm4rnds4 -8192(%edx), %zmm3, %zmm2 +# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192] +0x62,0xf2,0x67,0x48,0xda,0x52,0x80 + +# ATT: vsm4key4 %ymm4, %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymm4 +0x62,0xf2,0x66,0x28,0xda,0xd4 + +# ATT: vsm4key4 %xmm4, %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmm4 +0x62,0xf2,0x66,0x08,0xda,0xd4 + +# ATT: vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%eax), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [eax] +0x62,0xf2,0x66,0x28,0xda,0x10 + +# ATT: vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4key4 4064(%ecx), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf2,0x66,0x28,0xda,0x51,0x7f + +# ATT: vsm4key4 -4096(%edx), %ymm3, %ymm2 +# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096] +0x62,0xf2,0x66,0x28,0xda,0x52,0x80 + +# ATT: vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%eax), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [eax] +0x62,0xf2,0x66,0x08,0xda,0x10 + +# ATT: vsm4key4 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4key4 2032(%ecx), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf2,0x66,0x08,0xda,0x51,0x7f + +# ATT: vsm4key4 -2048(%edx), %xmm3, %xmm2 +# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048] +0x62,0xf2,0x66,0x08,0xda,0x52,0x80 + +# ATT: vsm4rnds4 %ymm4, %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymm4 +0x62,0xf2,0x67,0x28,0xda,0xd4 + +# ATT: vsm4rnds4 %xmm4, %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmm4 +0x62,0xf2,0x67,0x08,0xda,0xd4 + +# ATT: vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%eax), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [eax] +0x62,0xf2,0x67,0x28,0xda,0x10 + +# ATT: vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4rnds4 4064(%ecx), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064] +0x62,0xf2,0x67,0x28,0xda,0x51,0x7f + +# ATT: vsm4rnds4 -4096(%edx), %ymm3, %ymm2 +# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096] +0x62,0xf2,0x67,0x28,0xda,0x52,0x80 + +# ATT: vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] +0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%eax), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [eax] +0x62,0xf2,0x67,0x08,0xda,0x10 + +# ATT: vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512] +0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4rnds4 2032(%ecx), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032] +0x62,0xf2,0x67,0x08,0xda,0x51,0x7f + +# ATT: vsm4rnds4 -2048(%edx), %xmm3, %xmm2 +# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048] +0x62,0xf2,0x67,0x08,0xda,0x52,0x80 diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt new file mode 100644 index 000000000000000..f89f4b5a8c0fb8f --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt @@ -0,0 +1,170 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: vsm4key4 %zmm24, %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmm24 +0x62,0x82,0x46,0x40,0xda,0xf0 + +# ATT: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%rip), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rip] +0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4key4 8128(%rcx), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe2,0x46,0x40,0xda,0x71,0x7f + +# ATT: vsm4key4 -8192(%rdx), %zmm23, %zmm22 +# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192] +0x62,0xe2,0x46,0x40,0xda,0x72,0x80 + +# ATT: vsm4rnds4 %zmm24, %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmm24 +0x62,0x82,0x47,0x40,0xda,0xf0 + +# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%rip), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip] +0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff + +# ATT: vsm4rnds4 8128(%rcx), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128] +0x62,0xe2,0x47,0x40,0xda,0x71,0x7f + +# ATT: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22 +# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192] +0x62,0xe2,0x47,0x40,0xda,0x72,0x80 + +# ATT: vsm4key4 %ymm24, %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymm24 +0x62,0x82,0x46,0x20,0xda,0xf0 + +# ATT: vsm4key4 %xmm24, %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmm24 +0x62,0x82,0x46,0x00,0xda,0xf0 + +# ATT: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%rip), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rip] +0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4key4 4064(%rcx), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe2,0x46,0x20,0xda,0x71,0x7f + +# ATT: vsm4key4 -4096(%rdx), %ymm23, %ymm22 +# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096] +0x62,0xe2,0x46,0x20,0xda,0x72,0x80 + +# ATT: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4key4 (%rip), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rip] +0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4key4 2032(%rcx), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe2,0x46,0x00,0xda,0x71,0x7f + +# ATT: vsm4key4 -2048(%rdx), %xmm23, %xmm22 +# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048] +0x62,0xe2,0x46,0x00,0xda,0x72,0x80 + +# ATT: vsm4rnds4 %ymm24, %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymm24 +0x62,0x82,0x47,0x20,0xda,0xf0 + +# ATT: vsm4rnds4 %xmm24, %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmm24 +0x62,0x82,0x47,0x00,0xda,0xf0 + +# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%rip), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip] +0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff + +# ATT: vsm4rnds4 4064(%rcx), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064] +0x62,0xe2,0x47,0x20,0xda,0x71,0x7f + +# ATT: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22 +# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096] +0x62,0xe2,0x47,0x20,0xda,0x72,0x80 + +# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10 + +# ATT: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00 + +# ATT: vsm4rnds4 (%rip), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip] +0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00 + +# ATT: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff + +# ATT: vsm4rnds4 2032(%rcx), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032] +0x62,0xe2,0x47,0x00,0xda,0x71,0x7f + +# ATT: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22 +# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048] +0x62,0xe2,0x47,0x00,0xda,0x72,0x80 diff --git a/llvm/test/MC/X86/sm4-evex-32-att.s b/llvm/test/MC/X86/sm4-evex-32-att.s new file mode 100644 index 000000000000000..de10d95ac74d7ba --- /dev/null +++ b/llvm/test/MC/X86/sm4-evex-32-att.s @@ -0,0 +1,169 @@ +// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vsm4key4 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4] + vsm4key4 %zmm4, %zmm3, %zmm2 + +// CHECK: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2 + +// CHECK: vsm4key4 (%eax), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10] + vsm4key4 (%eax), %zmm3, %zmm2 + +// CHECK: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vsm4key4 8128(%ecx), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f] + vsm4key4 8128(%ecx), %zmm3, %zmm2 + +// CHECK: vsm4key4 -8192(%edx), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80] + vsm4key4 -8192(%edx), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 %zmm4, %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4] + vsm4rnds4 %zmm4, %zmm3, %zmm2 + +// CHECK: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 (%eax), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10] + vsm4rnds4 (%eax), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 8128(%ecx), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f] + vsm4rnds4 8128(%ecx), %zmm3, %zmm2 + +// CHECK: vsm4rnds4 -8192(%edx), %zmm3, %zmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80] + vsm4rnds4 -8192(%edx), %zmm3, %zmm2 + +// CHECK: {evex} vsm4key4 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4] + {evex} vsm4key4 %ymm4, %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4] + {evex} vsm4key4 %xmm4, %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 (%eax), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10] + {evex} vsm4key4 (%eax), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff] + {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f] + {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80] + {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2 + +// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 (%eax), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10] + {evex} vsm4key4 (%eax), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff] + {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f] + {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2 + +// CHECK: {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80] + {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4] + {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4] + {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 (%eax), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10] + {evex} vsm4rnds4 (%eax), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff] + {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f] + {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80] + {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2 + +// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 (%eax), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10] + {evex} vsm4rnds4 (%eax), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff] + {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f] + {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2 + +// CHECK: {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80] + {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2 \ No newline at end of file diff --git a/llvm/test/MC/X86/sm4-evex-32-intel.s b/llvm/test/MC/X86/sm4-evex-32-intel.s new file mode 100644 index 000000000000000..812fdb13f80913e --- /dev/null +++ b/llvm/test/MC/X86/sm4-evex-32-intel.s @@ -0,0 +1,169 @@ +// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vsm4key4 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4] + vsm4key4 zmm2, zmm3, zmm4 + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10] + vsm4key4 zmm2, zmm3, zmmword ptr [eax] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f] + vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192] +// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80] + vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmm4 +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4] + vsm4rnds4 zmm2, zmm3, zmm4 + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10] + vsm4rnds4 zmm2, zmm3, zmmword ptr [eax] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff] + vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f] + vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128] + +// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192] +// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80] + vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4] + {evex} vsm4key4 ymm2, ymm3, ymm4 + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4] + {evex} vsm4key4 xmm2, xmm3, xmm4 + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096] +// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80] + {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048] +// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80] + {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymm4 +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4] + {evex} vsm4rnds4 ymm2, ymm3, ymm4 + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmm4 +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4] + {evex} vsm4rnds4 xmm2, xmm3, xmm4 + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064] + +// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096] +// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80] + {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032] + +// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048] +// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80] + {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048] diff --git a/llvm/test/MC/X86/sm4-evex-64-att.s b/llvm/test/MC/X86/sm4-evex-64-att.s new file mode 100644 index 000000000000000..389a29b11897954 --- /dev/null +++ b/llvm/test/MC/X86/sm4-evex-64-att.s @@ -0,0 +1,224 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: vsm4key4 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0] + vsm4key4 %zmm24, %zmm23, %zmm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22 + +// CHECK: vsm4key4 (%rip), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %zmm23, %zmm22 + +// CHECK: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vsm4key4 8128(%rcx), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f] + vsm4key4 8128(%rcx), %zmm23, %zmm22 + +// CHECK: vsm4key4 -8192(%rdx), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80] + vsm4key4 -8192(%rdx), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 %zmm24, %zmm23, %zmm22 +// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0] + vsm4rnds4 %zmm24, %zmm23, %zmm22 + +// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 (%rip), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 (%rip), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 8128(%rcx), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f] + vsm4rnds4 8128(%rcx), %zmm23, %zmm22 + +// CHECK: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80] + vsm4rnds4 -8192(%rdx), %zmm23, %zmm22 + +// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0] + vsm4key4 %ymm24, %ymm23, %ymm22 + +// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0] + vsm4key4 %xmm24, %xmm23, %xmm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 + +// CHECK: vsm4key4 (%rip), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %ymm23, %ymm22 + +// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f] + vsm4key4 4064(%rcx), %ymm23, %ymm22 + +// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80] + vsm4key4 -4096(%rdx), %ymm23, %ymm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 + +// CHECK: vsm4key4 (%rip), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %xmm23, %xmm22 + +// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f] + vsm4key4 2032(%rcx), %xmm23, %xmm22 + +// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80] + vsm4key4 -2048(%rdx), %xmm23, %xmm22 +// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0] + vsm4key4 %ymm24, %ymm23, %ymm22 + +// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0] + vsm4key4 %xmm24, %xmm23, %xmm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22 + +// CHECK: vsm4key4 (%rip), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %ymm23, %ymm22 + +// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f] + vsm4key4 4064(%rcx), %ymm23, %ymm22 + +// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80] + vsm4key4 -4096(%rdx), %ymm23, %ymm22 + +// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22 + +// CHECK: vsm4key4 (%rip), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 (%rip), %xmm23, %xmm22 + +// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4key4 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f] + vsm4key4 2032(%rcx), %xmm23, %xmm22 + +// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80] + vsm4key4 -2048(%rdx), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 %ymm24, %ymm23, %ymm22 +// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0] + vsm4rnds4 %ymm24, %ymm23, %ymm22 + +// CHECK: vsm4rnds4 %xmm24, %xmm23, %xmm22 +// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0] + vsm4rnds4 %xmm24, %xmm23, %xmm22 + +// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 (%rip), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 (%rip), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 4064(%rcx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f] + vsm4rnds4 4064(%rcx), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80] + vsm4rnds4 -4096(%rdx), %ymm23, %ymm22 + +// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 (%rip), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 (%rip), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 2032(%rcx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f] + vsm4rnds4 2032(%rcx), %xmm23, %xmm22 + +// CHECK: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80] + vsm4rnds4 -2048(%rdx), %xmm23, %xmm22 diff --git a/llvm/test/MC/X86/sm4-evex-64-intel.s b/llvm/test/MC/X86/sm4-evex-64-intel.s new file mode 100644 index 000000000000000..3cc18cf4178ed88 --- /dev/null +++ b/llvm/test/MC/X86/sm4-evex-64-intel.s @@ -0,0 +1,169 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: vsm4key4 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0] + vsm4key4 zmm22, zmm23, zmm24 + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 zmm22, zmm23, zmmword ptr [rip] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f] + vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192] +// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80] + vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmm24 +// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0] + vsm4rnds4 zmm22, zmm23, zmm24 + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 zmm22, zmm23, zmmword ptr [rip] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff] + vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128] +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f] + vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128] + +// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192] +// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80] + vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192] + +// CHECK: vsm4key4 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0] + vsm4key4 ymm22, ymm23, ymm24 + +// CHECK: vsm4key4 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0] + vsm4key4 xmm22, xmm23, xmm24 + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 ymm22, ymm23, ymmword ptr [rip] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f] + vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80] + vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4key4 xmm22, xmm23, xmmword ptr [rip] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f] + vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80] + vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymm24 +// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0] + vsm4rnds4 ymm22, ymm23, ymm24 + +// CHECK: vsm4rnds4 xmm22, xmm23, xmm24 +// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0] + vsm4rnds4 xmm22, xmm23, xmm24 + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 ymm22, ymm23, ymmword ptr [rip] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff] + vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064] +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f] + vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064] + +// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096] +// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80] + vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] +// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10] + vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] +// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00] + vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip] +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00] + vsm4rnds4 xmm22, xmm23, xmmword ptr [rip] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512] +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff] + vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032] +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f] + vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032] + +// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048] +// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80] + vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048] diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 85d9b02ac0cbf1c..43c206fa0af698e 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -4113,8 +4113,14 @@ static const X86FoldTableEntry Table2[] = { {X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0}, {X86::VSHUFPSrri, X86::VSHUFPSrmi, 0}, {X86::VSM4KEY4Yrr, X86::VSM4KEY4Yrm, 0}, + {X86::VSM4KEY4Z128rr, X86::VSM4KEY4Z128rm, 0}, + {X86::VSM4KEY4Z256rr, X86::VSM4KEY4Z256rm, 0}, + {X86::VSM4KEY4Zrr, X86::VSM4KEY4Zrm, 0}, {X86::VSM4KEY4rr, X86::VSM4KEY4rm, 0}, {X86::VSM4RNDS4Yrr, X86::VSM4RNDS4Yrm, 0}, + {X86::VSM4RNDS4Z128rr, X86::VSM4RNDS4Z128rm, 0}, + {X86::VSM4RNDS4Z256rr, X86::VSM4RNDS4Z256rm, 0}, + {X86::VSM4RNDS4Zrr, X86::VSM4RNDS4Zrm, 0}, {X86::VSM4RNDS4rr, X86::VSM4RNDS4rm, 0}, {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mkz, 0}, {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mkz, 0}, From a1d31caa8c53082d12f580122dcf2b2ff8285e78 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Mon, 28 Oct 2024 11:36:37 +0800 Subject: [PATCH 128/425] [clang-tidy] fix false positive for implicit conversion of comparison result in C23 (#113639) Fixed #111013 bool will be builtin type in C23 but comparison result in C is still int. It is no need to change this kind of implicit cast to explicit cast. --- .../readability/ImplicitBoolConversionCheck.cpp | 8 ++++++++ clang-tools-extra/docs/ReleaseNotes.rst | 3 ++- .../checkers/readability/implicit-bool-conversion.c | 9 +++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp index 968a4a55a6d7988..f9fd1d903e231e1 100644 --- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp @@ -10,6 +10,7 @@ #include "../utils/FixItHintUtils.h" #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Lex/Lexer.h" #include "clang/Tooling/FixIt.h" #include @@ -26,6 +27,8 @@ AST_MATCHER(Stmt, isMacroExpansion) { return SM.isMacroBodyExpansion(Loc) || SM.isMacroArgExpansion(Loc); } +AST_MATCHER(Stmt, isC23) { return Finder->getASTContext().getLangOpts().C23; } + bool isNULLMacroExpansion(const Stmt *Statement, ASTContext &Context) { SourceManager &SM = Context.getSourceManager(); const LangOptions &LO = Context.getLangOpts(); @@ -298,6 +301,11 @@ void ImplicitBoolConversionCheck::registerMatchers(MatchFinder *Finder) { hasCastKind(CK_FloatingToBoolean), hasCastKind(CK_PointerToBoolean), hasCastKind(CK_MemberPointerToBoolean)), + // Exclude cases of C23 comparison result. + unless(allOf(isC23(), + hasSourceExpression(ignoringParens( + binaryOperator(hasAnyOperatorName( + ">", ">=", "==", "!=", "<", "<=")))))), // Exclude case of using if or while statements with variable // declaration, e.g.: // if (int var = functionCall()) {} diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 876689c40fcdb21..4cc4c2146d7e330 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -249,7 +249,8 @@ Changes in existing checks - Improved :doc:`readability-implicit-bool-conversion ` check by adding the option `UseUpperCaseLiteralSuffix` to select the - case of the literal suffix in fixes. + case of the literal suffix in fixes and fixing false positive for implicit + conversion of comparison result in C23. - Improved :doc:`readability-redundant-smartptr-get ` check to diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c index f3dc32c10d640ae..0b231d10adf8fc6 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c @@ -304,6 +304,15 @@ void implicitConversionToBoolFromUnaryMinusAndZeroLiterals() { // CHECK-FIXES: functionTakingBool((-0.0) != 0.0); } +void ignoreImplicitCastToBoolForComparisonResult() { + bool boolFromComparison0 = 1 != 0; + bool boolFromComparison1 = 1 == 0; + bool boolFromComparison2 = 1 > 0; + bool boolFromComparison3 = 1 >= 0; + bool boolFromComparison4 = 1 < 0; + bool boolFromComparison5 = 1 <= 0; +} + void ignoreExplicitCastsToBool() { int integer = 10; bool boolComingFromInt = (bool)integer; From c876d719ef5b10128eca6d8677068248b4831017 Mon Sep 17 00:00:00 2001 From: antangelo Date: Mon, 28 Oct 2024 00:25:56 -0400 Subject: [PATCH 129/425] [clang][NFC] Accept const NamedDecl pointer for getDepthAndIndex (#113857) --- clang/include/clang/Sema/SemaInternal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Sema/SemaInternal.h b/clang/include/clang/Sema/SemaInternal.h index d994d1819b44237..41d05b2bfb078eb 100644 --- a/clang/include/clang/Sema/SemaInternal.h +++ b/clang/include/clang/Sema/SemaInternal.h @@ -58,7 +58,7 @@ inline InheritableAttr *getDLLAttr(Decl *D) { } /// Retrieve the depth and index of a template parameter. -inline std::pair getDepthAndIndex(NamedDecl *ND) { +inline std::pair getDepthAndIndex(const NamedDecl *ND) { if (const auto *TTP = dyn_cast(ND)) return std::make_pair(TTP->getDepth(), TTP->getIndex()); From 99b2feadcc770ea52f71dda6cf027373a6931c6c Mon Sep 17 00:00:00 2001 From: ZhangYin Date: Mon, 28 Oct 2024 12:40:09 +0800 Subject: [PATCH 130/425] [libc++] temporarily mark XFAIL for the armv7-unknown-linux-gnueabihf with simd unary test to pass the CI (#113641) --- .../test/std/experimental/simd/simd.class/simd_unary.pass.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libcxx/test/std/experimental/simd/simd.class/simd_unary.pass.cpp b/libcxx/test/std/experimental/simd/simd.class/simd_unary.pass.cpp index f205ac971e5f08f..8cc9924a3cdae14 100644 --- a/libcxx/test/std/experimental/simd/simd.class/simd_unary.pass.cpp +++ b/libcxx/test/std/experimental/simd/simd.class/simd_unary.pass.cpp @@ -16,6 +16,10 @@ // clang: error: unable to execute command: Illegal instruction: 4 // XFAIL: target=x86_64-apple-macosx13.7 +// FIXME: The following issue occurs on Windows to Armv7 Ubuntu Linux: +// Assertion failed: N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type" +// XFAIL: target=armv7-unknown-linux-gnueabihf + // // // [simd.class] From d3f70db51cbc0876937d404e96fbda04df793bd4 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Mon, 28 Oct 2024 12:59:51 +0800 Subject: [PATCH 131/425] [X86][MC] Support instructions of MSR_IMM (#113524) Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368 --- llvm/docs/ReleaseNotes.md | 2 ++ llvm/lib/Target/X86/X86InstrSystem.td | 8 +++++- llvm/test/MC/Disassembler/X86/apx/msr-imm.txt | 18 +++++++++++++ llvm/test/MC/Disassembler/X86/msrimm-64.txt | 10 ++++++++ llvm/test/MC/X86/apx/msrimm-att.s | 25 +++++++++++++++++++ llvm/test/MC/X86/apx/msrimm-intel.s | 21 ++++++++++++++++ llvm/test/MC/X86/msrimm-64-att.s | 14 +++++++++++ llvm/test/MC/X86/msrimm-64-intel.s | 10 ++++++++ 8 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MC/Disassembler/X86/apx/msr-imm.txt create mode 100644 llvm/test/MC/Disassembler/X86/msrimm-64.txt create mode 100644 llvm/test/MC/X86/apx/msrimm-att.s create mode 100644 llvm/test/MC/X86/apx/msrimm-intel.s create mode 100644 llvm/test/MC/X86/msrimm-64-att.s create mode 100644 llvm/test/MC/X86/msrimm-64-intel.s diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 7c7e687e94749e4..2580f09be3ad77b 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -221,6 +221,8 @@ Changes to the X86 Backend * Supported ISA of `SM4(EVEX)`. +* Supported ISA of `MSR_IMM`. + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td index e1573b37d4dc265..dc701f1afc915f7 100644 --- a/llvm/lib/Target/X86/X86InstrSystem.td +++ b/llvm/lib/Target/X86/X86InstrSystem.td @@ -466,7 +466,10 @@ multiclass Urdwrmsr { "urdmsr\t{$imm, $dst|$dst, $imm}", [(set GR64:$dst, (int_x86_urdmsr i64immSExt32_su:$imm))]>, T_MAP7, VEX, XD, NoCD8; -} + def RDMSRri#suffix : Ii32<0xf6, MRM0r, (outs GR64:$dst), (ins i64i32imm:$imm), + "rdmsr\t{$imm, $dst|$dst, $imm}", []>, + T_MAP7, VEX, XD, NoCD8; + } let mayStore = 1 in { let OpMap = rrmap in def UWRMSRrr#suffix : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), @@ -476,6 +479,9 @@ multiclass Urdwrmsr { "uwrmsr\t{$src, $imm|$imm, $src}", [(int_x86_uwrmsr i64immSExt32_su:$imm, GR64:$src)]>, T_MAP7, VEX, XS, NoCD8; + def WRMSRNSir#suffix : Ii32<0xf6, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm), + "wrmsrns\t{$src, $imm|$imm, $src}", + []>, T_MAP7, VEX, XS, NoCD8; } } diff --git a/llvm/test/MC/Disassembler/X86/apx/msr-imm.txt b/llvm/test/MC/Disassembler/X86/apx/msr-imm.txt new file mode 100644 index 000000000000000..63465bb7070ea8e --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/apx/msr-imm.txt @@ -0,0 +1,18 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: rdmsr $123, %r9 +# INTEL: rdmsr r9, 123 +0x62,0xd7,0x7f,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00 + +# ATT: rdmsr $123, %r19 +# INTEL: rdmsr r19, 123 +0x62,0xff,0x7f,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00 + +# ATT: wrmsrns %r9, $123 +# INTEL: wrmsrns 123, r9 +0x62,0xd7,0x7e,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00 + +# ATT: wrmsrns %r19, $123 +# INTEL: wrmsrns 123, r19 +0x62,0xff,0x7e,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/X86/msrimm-64.txt b/llvm/test/MC/Disassembler/X86/msrimm-64.txt new file mode 100644 index 000000000000000..625d70d739cd349 --- /dev/null +++ b/llvm/test/MC/Disassembler/X86/msrimm-64.txt @@ -0,0 +1,10 @@ +# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT +# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL + +# ATT: rdmsr $123, %r9 +# INTEL: rdmsr r9, 123 +0xc4,0xc7,0x7b,0xf6,0xc1,0x7b,0x00,0x00,0x00 + +# ATT: wrmsrns %r9, $123 +# INTEL: wrmsrns 123, r9 +0xc4,0xc7,0x7a,0xf6,0xc1,0x7b,0x00,0x00,0x00 diff --git a/llvm/test/MC/X86/apx/msrimm-att.s b/llvm/test/MC/X86/apx/msrimm-att.s new file mode 100644 index 000000000000000..e4259f19cb7be45 --- /dev/null +++ b/llvm/test/MC/X86/apx/msrimm-att.s @@ -0,0 +1,25 @@ +# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +# ERROR-COUNT-4: error: +# ERROR-NOT: error: + +## rdmsr + +// CHECK: {evex} rdmsr $123, %r9 +// CHECK: encoding: [0x62,0xd7,0x7f,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00] + {evex} rdmsr $123, %r9 + +// CHECK: rdmsr $123, %r19 +// CHECK: encoding: [0x62,0xff,0x7f,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00] + rdmsr $123, %r19 + +## wrmsrns + +# CHECK: {evex} wrmsrns %r9, $123 +# CHECK: encoding: [0x62,0xd7,0x7e,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00] + {evex} wrmsrns %r9, $123 + +# CHECK: wrmsrns %r19, $123 +# CHECK: encoding: [0x62,0xff,0x7e,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00] + wrmsrns %r19, $123 diff --git a/llvm/test/MC/X86/apx/msrimm-intel.s b/llvm/test/MC/X86/apx/msrimm-intel.s new file mode 100644 index 000000000000000..d7eab047dd0cf77 --- /dev/null +++ b/llvm/test/MC/X86/apx/msrimm-intel.s @@ -0,0 +1,21 @@ +# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +## urdmsr + +# CHECK: {evex} rdmsr r9, 123 +# CHECK: encoding: [0x62,0xd7,0x7f,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00] + {evex} rdmsr r9, 123 + +# CHECK: rdmsr r19, 123 +# CHECK: encoding: [0x62,0xff,0x7f,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00] + rdmsr r19, 123 + +## uwrmsr + +# CHECK: {evex} wrmsrns 123, r9 +# CHECK: encoding: [0x62,0xd7,0x7e,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00] + {evex} wrmsrns 123, r9 + +# CHECK: wrmsrns 123, r19 +# CHECK: encoding: [0x62,0xff,0x7e,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00] + wrmsrns 123, r19 diff --git a/llvm/test/MC/X86/msrimm-64-att.s b/llvm/test/MC/X86/msrimm-64-att.s new file mode 100644 index 000000000000000..e69eb7ff29a61e4 --- /dev/null +++ b/llvm/test/MC/X86/msrimm-64-att.s @@ -0,0 +1,14 @@ +// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s +// RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR + +// ERROR-COUNT-2: error: +// ERROR-NOT: error: + +// CHECK: rdmsr $123, %r9 +// CHECK: encoding: [0xc4,0xc7,0x7b,0xf6,0xc1,0x7b,0x00,0x00,0x00] + rdmsr $123, %r9 + +// CHECK: wrmsrns %r9, $123 +// CHECK: encoding: [0xc4,0xc7,0x7a,0xf6,0xc1,0x7b,0x00,0x00,0x00] + wrmsrns %r9, $123 + diff --git a/llvm/test/MC/X86/msrimm-64-intel.s b/llvm/test/MC/X86/msrimm-64-intel.s new file mode 100644 index 000000000000000..e1ae9c67912365b --- /dev/null +++ b/llvm/test/MC/X86/msrimm-64-intel.s @@ -0,0 +1,10 @@ +// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s + +// CHECK: rdmsr r9, 123 +// CHECK: encoding: [0xc4,0xc7,0x7b,0xf6,0xc1,0x7b,0x00,0x00,0x00] + rdmsr r9, 123 + +// CHECK: wrmsrns 123, r9 +// CHECK: encoding: [0xc4,0xc7,0x7a,0xf6,0xc1,0x7b,0x00,0x00,0x00] + wrmsrns 123, r9 + From 5d4a0d54b5269bad1410e6db957836fe98634069 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sun, 27 Oct 2024 23:51:20 +0000 Subject: [PATCH 132/425] [InstCombine] Teach takeLog2 about right shifts, truncation and bitwise-and We left some easy opportunities for further simplifications. log2(trunc(x)) is simply trunc(log2(x)). This is safe if we know that trunc is NUW because it means that the truncation didn't drop any bits. It is also safe if the caller is OK with zero as a possible answer. log2(x >>u y) is simply `log2(x) - y`. log2(x & y) is a funny one. It comes up when doing something like: ``` unsigned int f(unsigned int x, unsigned int y) { unsigned char a = 1u << x; return y / a; } ``` LLVM would canonicalize this to: ``` %shl = shl nuw i32 1, %x %conv1 = and i32 %shl, 255 %div = udiv i32 %y, %conv1 ``` In cases like these, we can ignore the mask entirely. This is equivalent to `y >> x`. --- .../InstCombine/InstCombineMulDivRem.cpp | 30 +++++++++++++ llvm/test/Transforms/InstCombine/div.ll | 43 +++++++++++++++++-- llvm/test/Transforms/InstCombine/shift.ll | 8 ++-- 3 files changed, 74 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index f4f3644acfe5ea0..b9c165da906da4b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1427,6 +1427,18 @@ static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth, if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold)) return IfFold([&]() { return Builder.CreateZExt(LogX, Op->getType()); }); + // log2(trunc x) -> trunc log2(X) + // FIXME: Require one use? + if (match(Op, m_Trunc(m_Value(X)))) { + auto *TI = cast(Op); + if (AssumeNonZero || TI->hasNoUnsignedWrap()) + if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold)) + return IfFold([&]() { + return Builder.CreateTrunc(LogX, Op->getType(), "", + /*IsNUW=*/TI->hasNoUnsignedWrap()); + }); + } + // log2(X << Y) -> log2(X) + Y // FIXME: Require one use unless X is 1? if (match(Op, m_Shl(m_Value(X), m_Value(Y)))) { @@ -1437,6 +1449,24 @@ static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth, return IfFold([&]() { return Builder.CreateAdd(LogX, Y); }); } + // log2(X >>u Y) -> log2(X) - Y + // FIXME: Require one use? + if (match(Op, m_LShr(m_Value(X), m_Value(Y)))) { + auto *PEO = cast(Op); + if (AssumeNonZero || PEO->isExact()) + if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold)) + return IfFold([&]() { return Builder.CreateSub(LogX, Y); }); + } + + // log2(X & Y) -> either log2(X) or log2(Y) + // This requires `AssumeNonZero` as `X & Y` may be zero when X != Y. + if (AssumeNonZero && match(Op, m_And(m_Value(X), m_Value(Y)))) { + if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold)) + return IfFold([&]() { return LogX; }); + if (Value *LogY = takeLog2(Builder, Y, Depth, AssumeNonZero, DoFold)) + return IfFold([&]() { return LogY; }); + } + // log2(Cond ? X : Y) -> Cond ? log2(X) : log2(Y) // FIXME: Require one use? if (SelectInst *SI = dyn_cast(Op)) diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll index e8a25ff44d02966..a91c9bfc91c40d2 100644 --- a/llvm/test/Transforms/InstCombine/div.ll +++ b/llvm/test/Transforms/InstCombine/div.ll @@ -429,9 +429,8 @@ define <2 x i32> @test31(<2 x i32> %x) { define i32 @test32(i32 %a, i32 %b) { ; CHECK-LABEL: @test32( -; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, [[B:%.*]] -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL]], 2 -; CHECK-NEXT: [[DIV2:%.*]] = udiv i32 [[A:%.*]], [[DIV]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], -1 +; CHECK-NEXT: [[DIV2:%.*]] = lshr i32 [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret i32 [[DIV2]] ; %shl = shl i32 2, %b @@ -1832,3 +1831,41 @@ define i32 @fold_disjoint_or_over_udiv(i32 %x) { %r = udiv i32 %or, 9 ret i32 %r } + +define i8 @udiv_trunc_shl(i32 %x) { +; CHECK-LABEL: @udiv_trunc_shl( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[UDIV1:%.*]] = lshr i8 8, [[TMP1]] +; CHECK-NEXT: ret i8 [[UDIV1]] +; + %lshr = shl i32 1, %x + %trunc = trunc i32 %lshr to i8 + %div = udiv i8 8, %trunc + ret i8 %div +} + +define i32 @zext_udiv_trunc_lshr(i32 %x) { +; CHECK-LABEL: @zext_udiv_trunc_lshr( +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8 +; CHECK-NEXT: [[TMP2:%.*]] = sub i8 5, [[TMP1]] +; CHECK-NEXT: [[UDIV1:%.*]] = lshr i8 8, [[TMP2]] +; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i8 [[UDIV1]] to i32 +; CHECK-NEXT: ret i32 [[ZEXT]] +; + %lshr = lshr i32 32, %x + %trunc = trunc i32 %lshr to i8 + %div = udiv i8 8, %trunc + %zext = zext i8 %div to i32 + ret i32 %zext +} + +define i32 @udiv_and_shl(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @udiv_and_shl( +; CHECK-NEXT: [[DIV1:%.*]] = lshr i32 [[C:%.*]], [[A:%.*]] +; CHECK-NEXT: ret i32 [[DIV1]] +; + %shl = shl i32 1, %a + %and = and i32 %b, %shl + %div = udiv i32 %c, %and + ret i32 %div +} diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 558f4ffbfcabe46..69f531e98f045b6 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -677,8 +677,8 @@ entry: define i32 @test42(i32 %a, i32 %b) { ; CHECK-LABEL: @test42( -; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 4096, [[B:%.*]] -; CHECK-NEXT: [[DIV2:%.*]] = udiv i32 [[A:%.*]], [[DIV]] +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 12, [[B:%.*]] +; CHECK-NEXT: [[DIV2:%.*]] = lshr i32 [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret i32 [[DIV2]] ; %div = lshr i32 4096, %b ; must be exact otherwise we'd divide by zero @@ -688,8 +688,8 @@ define i32 @test42(i32 %a, i32 %b) { define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @test42vec( -; CHECK-NEXT: [[DIV:%.*]] = lshr exact <2 x i32> , [[B:%.*]] -; CHECK-NEXT: [[DIV2:%.*]] = udiv <2 x i32> [[A:%.*]], [[DIV]] +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> , [[B:%.*]] +; CHECK-NEXT: [[DIV2:%.*]] = lshr <2 x i32> [[A:%.*]], [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[DIV2]] ; %div = lshr <2 x i32> , %b ; must be exact otherwise we'd divide by zero From 819abe412dd554303cb932d6ec2200b9b9ebdd78 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Mon, 28 Oct 2024 14:07:32 +0700 Subject: [PATCH 133/425] [Test] Fix usage of constrained intrinsics (#113523) Some tests contain errors in constrained intrinsic usage, such as missed or extra type parameters, wrong type parameters order and some other. --------- Co-authored-by: Andy Kaylor --- llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll | 32 +++++------ .../CodeGen/AArch64/fp-intrinsics-fp16.ll | 16 +++--- .../CodeGen/AArch64/fp-intrinsics-vector.ll | 6 +- llvm/test/CodeGen/AArch64/fp-intrinsics.ll | 48 ++++++++-------- llvm/test/CodeGen/ARM/fp-intrinsics.ll | 56 +++++++++---------- .../ppcf128-constrained-fp-intrinsics.ll | 28 +++++----- .../CodeGen/RISCV/double-intrinsics-strict.ll | 4 +- .../CodeGen/RISCV/float-intrinsics-strict.ll | 4 +- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 6 +- llvm/test/CodeGen/X86/bfloat-constrained.ll | 18 +++--- .../CodeGen/X86/float-strict-powi-convert.ll | 4 +- .../Transforms/EarlyCSE/defaultfp-strictfp.ll | 20 +++---- .../Transforms/EarlyCSE/ebstrict-strictfp.ll | 12 ++-- .../Transforms/EarlyCSE/mixed-strictfp.ll | 20 +++---- .../Transforms/EarlyCSE/nonmixed-strictfp.ll | 20 +++---- .../Transforms/SCCP/strictfp-phis-fcmp.ll | 26 ++++----- .../Transforms/SCCP/strictfp-phis-fcmps.ll | 26 ++++----- 17 files changed, 173 insertions(+), 173 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll index 049098ab2ae97d8..40684b0f3a256b0 100644 --- a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll +++ b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll @@ -131,7 +131,7 @@ define double @t1_strict(double %x) #0 { ; CHECK-NEXT: ret entry: %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") #0 - %conv1 = call double @llvm.experimental.constrained.sitofp.i64.f64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %conv1 = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %conv1 } @@ -143,7 +143,7 @@ define float @t2_strict(float %x) #0 { ; CHECK-NEXT: ret entry: %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0 - %conv1 = call float @llvm.experimental.constrained.sitofp.i32.f32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %conv1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %conv1 } @@ -155,7 +155,7 @@ define half @t3_strict(half %x) #0 { ; CHECK-NEXT: ret entry: %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0 - %conv1 = call half @llvm.experimental.constrained.sitofp.i32.f16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %conv1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret half %conv1 } @@ -167,7 +167,7 @@ define double @t4_strict(double %x) #0 { ; CHECK-NEXT: ret entry: %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0 - %conv1 = call double @llvm.experimental.constrained.uitofp.i64.f64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %conv1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret double %conv1 } @@ -179,7 +179,7 @@ define float @t5_strict(float %x) #0 { ; CHECK-NEXT: ret entry: %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0 - %conv1 = call float @llvm.experimental.constrained.uitofp.i32.f32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %conv1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret float %conv1 } @@ -191,7 +191,7 @@ define half @t6_strict(half %x) #0 { ; CHECK-NEXT: ret entry: %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0 - %conv1 = call half @llvm.experimental.constrained.uitofp.i32.f16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %conv1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret half %conv1 } @@ -216,7 +216,7 @@ define bfloat @t7_strict(bfloat %x) #0 { ; CHECK-NEXT: ret entry: %conv = call i32 @llvm.experimental.constrained.fptosi.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0 - %conv1 = call bfloat @llvm.experimental.constrained.sitofp.i32.bf16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %conv1 = call bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret bfloat %conv1 } @@ -241,7 +241,7 @@ define bfloat @t8_strict(bfloat %x) #0 { ; CHECK-NEXT: ret entry: %conv = call i32 @llvm.experimental.constrained.fptoui.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0 - %conv1 = call bfloat @llvm.experimental.constrained.uitofp.i32.bf16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %conv1 = call bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret bfloat %conv1 } @@ -255,11 +255,11 @@ declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) -declare bfloat @llvm.experimental.constrained.sitofp.i32.bf16(i32, metadata, metadata) -declare bfloat @llvm.experimental.constrained.uitofp.i32.bf16(i32, metadata, metadata) -declare half @llvm.experimental.constrained.sitofp.i32.f16(i32, metadata, metadata) -declare half @llvm.experimental.constrained.uitofp.i32.f16(i32, metadata, metadata) -declare float @llvm.experimental.constrained.sitofp.i32.f32(i32, metadata, metadata) -declare float @llvm.experimental.constrained.uitofp.i32.f32(i32, metadata, metadata) -declare double @llvm.experimental.constrained.sitofp.i64.f64(i64, metadata, metadata) -declare double @llvm.experimental.constrained.uitofp.i64.f64(i64, metadata, metadata) +declare bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32, metadata, metadata) +declare bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll index 3aeefab52c6fa33..4cce06dce44c9ba 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll @@ -595,7 +595,7 @@ define i32 @lrint_f16(half %x) #0 { ; CHECK-FP16-NEXT: frintx h0, h0 ; CHECK-FP16-NEXT: fcvtzs w0, h0 ; CHECK-FP16-NEXT: ret - %val = call i32 @llvm.experimental.constrained.lrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } @@ -612,7 +612,7 @@ define i64 @llrint_f16(half %x) #0 { ; CHECK-FP16-NEXT: frintx h0, h0 ; CHECK-FP16-NEXT: fcvtzs x0, h0 ; CHECK-FP16-NEXT: ret - %val = call i64 @llvm.experimental.constrained.llrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } @@ -693,7 +693,7 @@ define i32 @lround_f16(half %x) #0 { ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtas w0, h0 ; CHECK-FP16-NEXT: ret - %val = call i32 @llvm.experimental.constrained.lround.f16(half %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata !"fpexcept.strict") #0 ret i32 %val } @@ -708,7 +708,7 @@ define i64 @llround_f16(half %x) #0 { ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtas x0, h0 ; CHECK-FP16-NEXT: ret - %val = call i64 @llvm.experimental.constrained.llround.f16(half %x, metadata !"fpexcept.strict") #0 + %val = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata !"fpexcept.strict") #0 ret i64 %val } @@ -1277,14 +1277,14 @@ declare half @llvm.experimental.constrained.exp.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.exp2.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata) -declare i32 @llvm.experimental.constrained.lrint.f16(half, metadata, metadata) -declare i64 @llvm.experimental.constrained.llrint.f16(half, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f16(half, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f16(half, metadata, metadata) declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata) declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata) declare half @llvm.experimental.constrained.ceil.f16(half, metadata) declare half @llvm.experimental.constrained.floor.f16(half, metadata) -declare i32 @llvm.experimental.constrained.lround.f16(half, metadata) -declare i64 @llvm.experimental.constrained.llround.f16(half, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f16(half, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f16(half, metadata) declare half @llvm.experimental.constrained.round.f16(half, metadata) declare half @llvm.experimental.constrained.roundeven.f16(half, metadata) declare half @llvm.experimental.constrained.trunc.f16(half, metadata) diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll index 1a9ba9fd4a5180c..6147afba4e603ae 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll @@ -279,7 +279,7 @@ define <4 x i1> @fcmps_v4f32(<4 x float> %x, <4 x float> %y) #0 { ; CHECK-NEXT: xtn v0.4h, v4.4s ; CHECK-NEXT: ret entry: - %val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict") + %val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict") ret <4 x i1> %val } @@ -825,8 +825,8 @@ declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, meta declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata) declare <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float>, metadata) declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata) -declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float>, <4 x float>, metadata, metadata) -declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float>, <4 x float>, metadata, metadata) +declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata) +declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float>, <4 x float>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll index 1664fa3ce56ae6b..fd3a0c3207606c2 100644 --- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll @@ -262,7 +262,7 @@ define float @nearbyint_f32(float %x) #0 { ; CHECK: frintx [[REG:s[0-9]+]], s0 ; CHECK: fcvtzs w0, [[REG]] define i32 @lrint_f32(float %x) #0 { - %val = call i32 @llvm.experimental.constrained.lrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } @@ -270,7 +270,7 @@ define i32 @lrint_f32(float %x) #0 { ; CHECK: frintx [[REG:s[0-9]+]], s0 ; CHECK: fcvtzs x0, [[REG]] define i64 @llrint_f32(float %x) #0 { - %val = call i64 @llvm.experimental.constrained.llrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } @@ -319,14 +319,14 @@ define float @floor_f32(float %x) #0 { ; CHECK-LABEL: lround_f32: ; CHECK: fcvtas w0, s0 define i32 @lround_f32(float %x) #0 { - %val = call i32 @llvm.experimental.constrained.lround.f32(float %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } ; CHECK-LABEL: llround_f32: ; CHECK: fcvtas x0, s0 define i64 @llround_f32(float %x) #0 { - %val = call i64 @llvm.experimental.constrained.llround.f32(float %x, metadata !"fpexcept.strict") #0 + %val = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata !"fpexcept.strict") #0 ret i64 %val } @@ -802,7 +802,7 @@ define double @nearbyint_f64(double %x) #0 { ; CHECK: frintx [[REG:d[0-9]+]], d0 ; CHECK: fcvtzs w0, [[REG]] define i32 @lrint_f64(double %x) #0 { - %val = call i32 @llvm.experimental.constrained.lrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } @@ -810,7 +810,7 @@ define i32 @lrint_f64(double %x) #0 { ; CHECK: frintx [[REG:d[0-9]+]], d0 ; CHECK: fcvtzs x0, [[REG]] define i64 @llrint_f64(double %x) #0 { - %val = call i64 @llvm.experimental.constrained.llrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } @@ -859,14 +859,14 @@ define double @floor_f64(double %x) #0 { ; CHECK-LABEL: lround_f64: ; CHECK: fcvtas w0, d0 define i32 @lround_f64(double %x) #0 { - %val = call i32 @llvm.experimental.constrained.lround.f64(double %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } ; CHECK-LABEL: llround_f64: ; CHECK: fcvtas x0, d0 define i64 @llround_f64(double %x) #0 { - %val = call i64 @llvm.experimental.constrained.llround.f64(double %x, metadata !"fpexcept.strict") #0 + %val = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata !"fpexcept.strict") #0 ret i64 %val } @@ -1341,14 +1341,14 @@ define fp128 @nearbyint_f128(fp128 %x) #0 { ; CHECK-LABEL: lrint_f128: ; CHECK: bl lrintl define i32 @lrint_f128(fp128 %x) #0 { - %val = call i32 @llvm.experimental.constrained.lrint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } ; CHECK-LABEL: llrint_f128: ; CHECK: bl llrintl define i64 @llrint_f128(fp128 %x) #0 { - %val = call i64 @llvm.experimental.constrained.llrint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i64 %val } @@ -1383,14 +1383,14 @@ define fp128 @floor_f128(fp128 %x) #0 { ; CHECK-LABEL: lround_f128: ; CHECK: bl lroundl define i32 @lround_f128(fp128 %x) #0 { - %val = call i32 @llvm.experimental.constrained.lround.f128(fp128 %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i32 %val } ; CHECK-LABEL: llround_f128: ; CHECK: bl llroundl define i64 @llround_f128(fp128 %x) #0 { - %val = call i64 @llvm.experimental.constrained.llround.f128(fp128 %x, metadata !"fpexcept.strict") #0 + %val = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0 ret i64 %val } @@ -1795,16 +1795,16 @@ declare float @llvm.experimental.constrained.exp.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.exp2.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) -declare i32 @llvm.experimental.constrained.lrint.f32(float, metadata, metadata) -declare i64 @llvm.experimental.constrained.llrint.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f32(float, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata) declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata) declare float @llvm.experimental.constrained.maximum.f32(float, float, metadata) declare float @llvm.experimental.constrained.minimum.f32(float, float, metadata) declare float @llvm.experimental.constrained.ceil.f32(float, metadata) declare float @llvm.experimental.constrained.floor.f32(float, metadata) -declare i32 @llvm.experimental.constrained.lround.f32(float, metadata) -declare i64 @llvm.experimental.constrained.llround.f32(float, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata) declare float @llvm.experimental.constrained.round.f32(float, metadata) declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) declare float @llvm.experimental.constrained.trunc.f32(float, metadata) @@ -1847,16 +1847,16 @@ declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) -declare i32 @llvm.experimental.constrained.lrint.f64(double, metadata, metadata) -declare i64 @llvm.experimental.constrained.llrint.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f64(double, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata) declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata) declare double @llvm.experimental.constrained.maximum.f64(double, double, metadata) declare double @llvm.experimental.constrained.minimum.f64(double, double, metadata) declare double @llvm.experimental.constrained.ceil.f64(double, metadata) declare double @llvm.experimental.constrained.floor.f64(double, metadata) -declare i32 @llvm.experimental.constrained.lround.f64(double, metadata) -declare i64 @llvm.experimental.constrained.llround.f64(double, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata) declare double @llvm.experimental.constrained.round.f64(double, metadata) declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) declare double @llvm.experimental.constrained.trunc.f64(double, metadata) @@ -1899,14 +1899,14 @@ declare fp128 @llvm.experimental.constrained.exp.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.exp2.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata) -declare i32 @llvm.experimental.constrained.lrint.f128(fp128, metadata, metadata) -declare i64 @llvm.experimental.constrained.llrint.f128(fp128, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata) declare fp128 @llvm.experimental.constrained.maxnum.f128(fp128, fp128, metadata) declare fp128 @llvm.experimental.constrained.minnum.f128(fp128, fp128, metadata) declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata) declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata) -declare i32 @llvm.experimental.constrained.lround.f128(fp128, metadata) -declare i64 @llvm.experimental.constrained.llround.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f128(fp128, metadata) declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata) declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata) declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata) diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll index e286eb3226e46f8..ca2dc701bd1fb37 100644 --- a/llvm/test/CodeGen/ARM/fp-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll @@ -68,7 +68,7 @@ define float @fma_f32(float %x, float %y, float %z) #0 { ; CHECK-NOSP: bl __aeabi_f2iz ; CHECK-SP: vcvt.s32.f32 define i32 @fptosi_f32(float %x) #0 { - %val = call i32 @llvm.experimental.constrained.fptosi.f32(float %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } @@ -79,9 +79,9 @@ define i32 @fptosi_f32(float %x) #0 { ; FIXME-CHECK-SP: vcvt.s32.f32 define void @fptosi_f32_twice(float %arg, ptr %ptr) #0 { entry: - %conv = call i32 @llvm.experimental.constrained.fptosi.f32(float %arg, metadata !"fpexcept.strict") #0 + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %arg, metadata !"fpexcept.strict") #0 store i32 %conv, ptr %ptr, align 4 - %conv1 = call i32 @llvm.experimental.constrained.fptosi.f32(float %arg, metadata !"fpexcept.strict") #0 + %conv1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %arg, metadata !"fpexcept.strict") #0 %idx = getelementptr inbounds i32, ptr %ptr, i32 1 store i32 %conv1, ptr %idx, align 4 ret void @@ -91,7 +91,7 @@ entry: ; CHECK-NOSP: bl __aeabi_f2uiz ; FIXME-CHECK-SP: vcvt.u32.f32 define i32 @fptoui_f32(float %x) #0 { - %val = call i32 @llvm.experimental.constrained.fptoui.f32(float %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } @@ -102,9 +102,9 @@ define i32 @fptoui_f32(float %x) #0 { ; FIXME-CHECK-SP: vcvt.u32.f32 define void @fptoui_f32_twice(float %arg, ptr %ptr) #0 { entry: - %conv = call i32 @llvm.experimental.constrained.fptoui.f32(float %arg, metadata !"fpexcept.strict") #0 + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %arg, metadata !"fpexcept.strict") #0 store i32 %conv, ptr %ptr, align 4 - %conv1 = call i32 @llvm.experimental.constrained.fptoui.f32(float %arg, metadata !"fpexcept.strict") #0 + %conv1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %arg, metadata !"fpexcept.strict") #0 %idx = getelementptr inbounds i32, ptr %ptr, i32 1 store i32 %conv1, ptr %idx, align 4 ret void @@ -209,14 +209,14 @@ define float @nearbyint_f32(float %x) #0 { ; CHECK-LABEL: lrint_f32: ; CHECK: bl lrintf define i32 @lrint_f32(float %x) #0 { - %val = call i32 @llvm.experimental.constrained.lrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } ; CHECK-LABEL: llrint_f32: ; CHECK: bl llrintf define i32 @llrint_f32(float %x) #0 { - %val = call i32 @llvm.experimental.constrained.llrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.llrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } @@ -259,14 +259,14 @@ define float @floor_f32(float %x) #0 { ; CHECK-LABEL: lround_f32: ; CHECK: bl lroundf define i32 @lround_f32(float %x) #0 { - %val = call i32 @llvm.experimental.constrained.lround.f32(float %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } ; CHECK-LABEL: llround_f32: ; CHECK: bl llroundf define i32 @llround_f32(float %x) #0 { - %val = call i32 @llvm.experimental.constrained.llround.f32(float %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.llround.i32.f32(float %x, metadata !"fpexcept.strict") #0 ret i32 %val } @@ -562,7 +562,7 @@ define double @fma_f64(double %x, double %y, double %z) #0 { ; CHECK-NODP: bl __aeabi_d2iz ; CHECK-DP: vcvt.s32.f64 define i32 @fptosi_f64(double %x) #0 { - %val = call i32 @llvm.experimental.constrained.fptosi.f64(double %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } @@ -570,7 +570,7 @@ define i32 @fptosi_f64(double %x) #0 { ; CHECK-NODP: bl __aeabi_d2uiz ; FIXME-CHECK-DP: vcvt.u32.f64 define i32 @fptoui_f64(double %x) #0 { - %val = call i32 @llvm.experimental.constrained.fptoui.f64(double %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } @@ -673,14 +673,14 @@ define double @nearbyint_f64(double %x) #0 { ; CHECK-LABEL: lrint_f64: ; CHECK: bl lrint define i32 @lrint_f64(double %x) #0 { - %val = call i32 @llvm.experimental.constrained.lrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } ; CHECK-LABEL: llrint_f64: ; CHECK: bl llrint define i32 @llrint_f64(double %x) #0 { - %val = call i32 @llvm.experimental.constrained.llrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.llrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 ret i32 %val } @@ -723,14 +723,14 @@ define double @floor_f64(double %x) #0 { ; CHECK-LABEL: lround_f64: ; CHECK: bl lround define i32 @lround_f64(double %x) #0 { - %val = call i32 @llvm.experimental.constrained.lround.f64(double %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } ; CHECK-LABEL: llround_f64: ; CHECK: bl llround define i32 @llround_f64(double %x) #0 { - %val = call i32 @llvm.experimental.constrained.llround.f64(double %x, metadata !"fpexcept.strict") #0 + %val = call i32 @llvm.experimental.constrained.llround.i32.f64(double %x, metadata !"fpexcept.strict") #0 ret i32 %val } @@ -1031,8 +1031,8 @@ declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, me declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) -declare i32 @llvm.experimental.constrained.fptosi.f32(float, metadata) -declare i32 @llvm.experimental.constrained.fptoui.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) @@ -1046,14 +1046,14 @@ declare float @llvm.experimental.constrained.exp.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.exp2.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) -declare i32 @llvm.experimental.constrained.lrint.f32(float, metadata, metadata) -declare i32 @llvm.experimental.constrained.llrint.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f32(float, metadata, metadata) +declare i32 @llvm.experimental.constrained.llrint.i32.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata) declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata) declare float @llvm.experimental.constrained.ceil.f32(float, metadata) declare float @llvm.experimental.constrained.floor.f32(float, metadata) -declare i32 @llvm.experimental.constrained.lround.f32(float, metadata) -declare i32 @llvm.experimental.constrained.llround.f32(float, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata) +declare i32 @llvm.experimental.constrained.llround.i32.f32(float, metadata) declare float @llvm.experimental.constrained.round.f32(float, metadata) declare float @llvm.experimental.constrained.trunc.f32(float, metadata) declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) @@ -1065,8 +1065,8 @@ declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) -declare i32 @llvm.experimental.constrained.fptosi.f64(double, metadata) -declare i32 @llvm.experimental.constrained.fptoui.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) @@ -1080,14 +1080,14 @@ declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) -declare i32 @llvm.experimental.constrained.lrint.f64(double, metadata, metadata) -declare i32 @llvm.experimental.constrained.llrint.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f64(double, metadata, metadata) +declare i32 @llvm.experimental.constrained.llrint.i32.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata) declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata) declare double @llvm.experimental.constrained.ceil.f64(double, metadata) declare double @llvm.experimental.constrained.floor.f64(double, metadata) -declare i32 @llvm.experimental.constrained.lround.f64(double, metadata) -declare i32 @llvm.experimental.constrained.llround.f64(double, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata) +declare i32 @llvm.experimental.constrained.llround.i32.f64(double, metadata) declare double @llvm.experimental.constrained.round.f64(double, metadata) declare double @llvm.experimental.constrained.trunc.f64(double, metadata) declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index 76f3dea5b7751d4..3e8935e7d5977bf 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1083,7 +1083,7 @@ define float @test_fptrunc_ppc_fp128_f32(ppc_fp128 %first) #0 { ; PC64-NEXT: frsp 1, 1 ; PC64-NEXT: blr entry: - %fptrunc = call float @llvm.experimental.constrained.fptrunc.ppcf128.f32( + %fptrunc = call float @llvm.experimental.constrained.fptrunc.f32.ppcf128.f32( ppc_fp128 %first, metadata !"round.dynamic", metadata !"fpexcept.strict") #1 @@ -1103,7 +1103,7 @@ define double @test_fptrunc_ppc_fp128_f64(ppc_fp128 %first) #0 { ; PC64: # %bb.0: # %entry ; PC64-NEXT: blr entry: - %fptrunc = call double @llvm.experimental.constrained.fptrunc.ppcf128.f64( + %fptrunc = call double @llvm.experimental.constrained.fptrunc.f64.ppcf128( ppc_fp128 %first, metadata !"round.dynamic", metadata !"fpexcept.strict") #1 @@ -1127,7 +1127,7 @@ define ppc_fp128 @test_fpext_ppc_fp128_f32(float %first) #0 { ; PC64-NEXT: lfs 2, .LCPI26_0@toc@l(3) ; PC64-NEXT: blr entry: - %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.f32.ppcf128( + %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f32( float %first, metadata !"fpexcept.strict") #1 ret ppc_fp128 %fpext @@ -1150,7 +1150,7 @@ define ppc_fp128 @test_fpext_ppc_fp128_f64(double %first) #0 { ; PC64-NEXT: lfs 2, .LCPI27_0@toc@l(3) ; PC64-NEXT: blr entry: - %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.f64.ppcf128( + %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f64( double %first, metadata !"fpexcept.strict") #1 ret ppc_fp128 %fpext @@ -1568,7 +1568,7 @@ define void @test_constrained_libcall_multichain(ptr %firstptr, ptr %result) #0 ; PC64-NEXT: mtlr 0 ; PC64-NEXT: blr %load = load float, ptr %firstptr - %first = call ppc_fp128 @llvm.experimental.constrained.fpext.f32.ppcf128( + %first = call ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f32( float %load, metadata !"fpexcept.strict") #1 store ppc_fp128 %first, ptr %result @@ -1598,7 +1598,7 @@ define void @test_constrained_libcall_multichain(ptr %firstptr, ptr %result) #0 i32 2, metadata !"round.dynamic", metadata !"fpexcept.strict") #1 - %tinypow = call float @llvm.experimental.constrained.fptrunc.ppcf128.f32( + %tinypow = call float @llvm.experimental.constrained.fptrunc.f32.ppcf128( ppc_fp128 %powi, metadata !"round.dynamic", metadata !"fpexcept.strict") #1 @@ -2015,7 +2015,7 @@ define i1 @ppcq_to_s1(ppc_fp128 %a) { ; PC64-NEXT: mtlr 0 ; PC64-NEXT: blr entry: - %conv = tail call i1 @llvm.experimental.constrained.fptosi.ppcf128.i1(ppc_fp128 %a, metadata !"fpexcept.strict") #1 + %conv = tail call i1 @llvm.experimental.constrained.fptosi.i1.ppcf128(ppc_fp128 %a, metadata !"fpexcept.strict") #1 ret i1 %conv } @@ -2062,7 +2062,7 @@ define i1 @ppcq_to_u1(ppc_fp128 %a) { ; PC64-NEXT: mtlr 0 ; PC64-NEXT: blr entry: - %conv = tail call i1 @llvm.experimental.constrained.fptoui.ppcf128.i1(ppc_fp128 %a, metadata !"fpexcept.strict") #1 + %conv = tail call i1 @llvm.experimental.constrained.fptoui.i1.ppcf128(ppc_fp128 %a, metadata !"fpexcept.strict") #1 ret i1 %conv } @@ -2121,10 +2121,10 @@ declare ppc_fp128 @llvm.experimental.constrained.exp.ppcf128(ppc_fp128, metadata declare ppc_fp128 @llvm.experimental.constrained.exp2.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.floor.ppcf128(ppc_fp128, metadata) declare ppc_fp128 @llvm.experimental.constrained.fma.ppcf128(ppc_fp128, ppc_fp128, ppc_fp128, metadata, metadata) -declare ppc_fp128 @llvm.experimental.constrained.fpext.f32.ppcf128(float, metadata) -declare ppc_fp128 @llvm.experimental.constrained.fpext.f64.ppcf128(double, metadata) -declare float @llvm.experimental.constrained.fptrunc.ppcf128.f32(ppc_fp128, metadata, metadata) -declare double @llvm.experimental.constrained.fptrunc.ppcf128.f64(ppc_fp128, metadata, metadata) +declare ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f32(float, metadata) +declare ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f64(double, metadata) +declare float @llvm.experimental.constrained.fptrunc.f32.ppcf128(ppc_fp128, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.f64.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.log.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.log10.ppcf128(ppc_fp128, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.log2.ppcf128(ppc_fp128, metadata, metadata) @@ -2144,10 +2144,10 @@ declare ppc_fp128 @llvm.experimental.constrained.tan.ppcf128(ppc_fp128, metadata declare ppc_fp128 @llvm.experimental.constrained.trunc.ppcf128(ppc_fp128, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata) -declare i1 @llvm.experimental.constrained.fptosi.ppcf128.i1(ppc_fp128, metadata) +declare i1 @llvm.experimental.constrained.fptosi.i1.ppcf128(ppc_fp128, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128, metadata) -declare i1 @llvm.experimental.constrained.fptoui.ppcf128.i1(ppc_fp128, metadata) +declare i1 @llvm.experimental.constrained.fptoui.i1.ppcf128(ppc_fp128, metadata) declare ppc_fp128 @llvm.experimental.constrained.sitofp.ppcf128.i32(i32, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.uitofp.ppcf128.i32(i32, metadata, metadata) declare ppc_fp128 @llvm.experimental.constrained.sitofp.ppcf128.i64(i64, metadata, metadata) diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll index 4cb6191e7322e9f..7e5ea173e52295b 100644 --- a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll @@ -57,7 +57,7 @@ define double @sqrt_f64(double %a) nounwind strictfp { ret double %1 } -declare double @llvm.experimental.constrained.powi.f64.i32(double, i32, metadata, metadata) +declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata) define double @powi_f64(double %a, i32 %b) nounwind strictfp { ; RV32IFD-LABEL: powi_f64: @@ -116,7 +116,7 @@ define double @powi_f64(double %a, i32 %b) nounwind strictfp { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret - %1 = call double @llvm.experimental.constrained.powi.f64.i32(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + %1 = call double @llvm.experimental.constrained.powi.f64(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret double %1 } diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll index cbd84634de11c0b..7b2d38fefaacb1b 100644 --- a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll @@ -52,7 +52,7 @@ define float @sqrt_f32(float %a) nounwind strictfp { ret float %1 } -declare float @llvm.experimental.constrained.powi.f32.i32(float, i32, metadata, metadata) +declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) define float @powi_f32(float %a, i32 %b) nounwind strictfp { ; RV32IF-LABEL: powi_f32: @@ -111,7 +111,7 @@ define float @powi_f32(float %a, i32 %b) nounwind strictfp { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret - %1 = call float @llvm.experimental.constrained.powi.f32.i32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + %1 = call float @llvm.experimental.constrained.powi.f32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp ret float %1 } diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 6700920cebff0a8..23ebfade6f6b0f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -357,11 +357,11 @@ define @vpmerge_constrained_fadd( %pass ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret - %a = call @llvm.experimental.constrained.fadd( %x, %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + %a = call @llvm.experimental.constrained.fadd.nxv2f32( %x, %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %m, i64 %vl) strictfp ret %b } -declare @llvm.experimental.constrained.fadd(, , metadata, metadata) +declare @llvm.experimental.constrained.fadd.nxv2f32(, , metadata, metadata) declare @llvm.riscv.vmerge.nxv2f32.nxv2f32(, , , , i64) ; This shouldn't be folded because we need to preserve exceptions with @@ -374,7 +374,7 @@ define @vpmerge_constrained_fadd_vlmax( ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret - %a = call @llvm.experimental.constrained.fadd( %x, %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + %a = call @llvm.experimental.constrained.fadd.nxv2f32( %x, %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp %b = call @llvm.riscv.vmerge.nxv2f32.nxv2f32( %passthru, %passthru, %a, %m, i64 -1) strictfp ret %b } diff --git a/llvm/test/CodeGen/X86/bfloat-constrained.ll b/llvm/test/CodeGen/X86/bfloat-constrained.ll index 0a8c4f20648b059..081b1cebfc43d62 100644 --- a/llvm/test/CodeGen/X86/bfloat-constrained.ll +++ b/llvm/test/CodeGen/X86/bfloat-constrained.ll @@ -86,7 +86,7 @@ define void @float_to_bfloat(float %0) strictfp { ; X64-NEXT: popq %rax ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq - %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 store bfloat %2, ptr @a, align 2 ret void } @@ -115,7 +115,7 @@ define void @double_to_bfloat(double %0) strictfp { ; X64-NEXT: popq %rax ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq - %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 store bfloat %2, ptr @a, align 2 ret void } @@ -162,20 +162,20 @@ define void @add() strictfp { ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq %1 = load bfloat, ptr @a, align 2 - %2 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %1, metadata !"fpexcept.strict") #0 + %2 = tail call float @llvm.experimental.constrained.fpext.f32.bf16(bfloat %1, metadata !"fpexcept.strict") #0 %3 = load bfloat, ptr @b, align 2 - %4 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %3, metadata !"fpexcept.strict") #0 + %4 = tail call float @llvm.experimental.constrained.fpext.f32.bf16(bfloat %3, metadata !"fpexcept.strict") #0 %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 - %6 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %6 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 store bfloat %6, ptr @c, align 2 ret void } -declare float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat, metadata) -declare double @llvm.experimental.constrained.fpext.f64.bfloat(bfloat, metadata) +declare float @llvm.experimental.constrained.fpext.f32.bf16(bfloat, metadata) +declare double @llvm.experimental.constrained.fpext.f64.bf16(bfloat, metadata) declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) -declare bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float, metadata, metadata) -declare bfloat @llvm.experimental.constrained.fptrunc.bfloat.f64(double, metadata, metadata) +declare bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float, metadata, metadata) +declare bfloat @llvm.experimental.constrained.fptrunc.bf16.f64(double, metadata, metadata) attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/float-strict-powi-convert.ll b/llvm/test/CodeGen/X86/float-strict-powi-convert.ll index 4d0cffc53d93af7..b39f5ec667cecda 100644 --- a/llvm/test/CodeGen/X86/float-strict-powi-convert.ll +++ b/llvm/test/CodeGen/X86/float-strict-powi-convert.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=x86_64-pc-windows-msvc %s -o - | FileCheck %s -check-prefix=WIN ; RUN: llc -mtriple=x86_64-pc-linux %s -o -| FileCheck %s -check-prefix=UNIX -declare float @llvm.experimental.constrained.powi.f32.i32(float, i32, metadata, metadata) +declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata) define float @powi_f64(float %a, i32 %b) nounwind strictfp { ; WIN-LABEL: powi_f64: @@ -19,6 +19,6 @@ define float @powi_f64(float %a, i32 %b) nounwind strictfp { ; UNIX-NEXT: callq __powisf2@PLT ; UNIX-NEXT: popq %rax ; UNIX-NEXT: retq - %1 = call float @llvm.experimental.constrained.powi.f32.i32(float %a, i32 %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") strictfp + %1 = call float @llvm.experimental.constrained.powi.f32(float %a, i32 %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") strictfp ret float %1 } diff --git a/llvm/test/Transforms/EarlyCSE/defaultfp-strictfp.ll b/llvm/test/Transforms/EarlyCSE/defaultfp-strictfp.ll index bdfad0d6e44e9c4..3871822c9dc17a5 100644 --- a/llvm/test/Transforms/EarlyCSE/defaultfp-strictfp.ll +++ b/llvm/test/Transforms/EarlyCSE/defaultfp-strictfp.ll @@ -246,8 +246,8 @@ define i1 @multiple_fcmp(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP1]] ; - %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 - %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -262,9 +262,9 @@ define i1 @multiple_fcmp_split(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP1]] ; - %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 call void @arbitraryfunc() #0 - %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -278,8 +278,8 @@ define i1 @multiple_fcmps(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP1]] ; - %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 - %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -294,9 +294,9 @@ define i1 @multiple_fcmps_split(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP1]] ; - %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 call void @arbitraryfunc() #0 - %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -318,5 +318,5 @@ declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) -declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata) -declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) diff --git a/llvm/test/Transforms/EarlyCSE/ebstrict-strictfp.ll b/llvm/test/Transforms/EarlyCSE/ebstrict-strictfp.ll index fafc7ccbb38c1f7..f2675ce7816a4e4 100644 --- a/llvm/test/Transforms/EarlyCSE/ebstrict-strictfp.ll +++ b/llvm/test/Transforms/EarlyCSE/ebstrict-strictfp.ll @@ -132,8 +132,8 @@ define i1 @fcmp_strict(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP2]] ; - %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 - %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -149,8 +149,8 @@ define i1 @fcmps_strict(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP2]] ; - %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 - %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -172,5 +172,5 @@ declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) -declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata) -declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) diff --git a/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll b/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll index f3b857ab2f4874c..b79f7018b8d0d55 100644 --- a/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll +++ b/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll @@ -339,8 +339,8 @@ define i1 @mixed_fcmp_maytrap(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP2]] ; - %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 - %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -356,8 +356,8 @@ define i1 @mixed_fcmp_strict(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP2]] ; - %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 - %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -373,8 +373,8 @@ define i1 @mixed_fcmps_maytrap(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP2]] ; - %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 - %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -390,8 +390,8 @@ define i1 @mixed_fcmps_strict(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP2]] ; - %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 - %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -413,5 +413,5 @@ declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) -declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata) -declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) diff --git a/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll b/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll index 8772f208ebe47ec..3acf5597dfc3fe6 100644 --- a/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll +++ b/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll @@ -313,8 +313,8 @@ define i1 @fcmp_defaultenv(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP1]] ; - %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 - %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -328,8 +328,8 @@ define i1 @fcmp_maytrap(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP1]] ; - %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 - %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -343,8 +343,8 @@ define i1 @fcmps_defaultenv(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP1]] ; - %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 - %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -358,8 +358,8 @@ define i1 @fcmps_maytrap(double %a, double %b) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]] ; CHECK-NEXT: ret i1 [[TMP1]] ; - %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 - %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 + %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0 %3 = zext i1 %1 to i32 %4 = zext i1 %2 to i32 %5 = call i32 @bar.i32(i32 %3, i32 %4) #0 @@ -381,5 +381,5 @@ declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) -declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata) -declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) diff --git a/llvm/test/Transforms/SCCP/strictfp-phis-fcmp.ll b/llvm/test/Transforms/SCCP/strictfp-phis-fcmp.ll index 3bf7d9578b560a2..a6c023a25608b42 100644 --- a/llvm/test/Transforms/SCCP/strictfp-phis-fcmp.ll +++ b/llvm/test/Transforms/SCCP/strictfp-phis-fcmp.ll @@ -19,7 +19,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 ret i1 %c } @@ -42,7 +42,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 ret i1 %c } @@ -66,7 +66,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 ret i1 %c } @@ -91,7 +91,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 2.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 ret i1 %c } @@ -115,7 +115,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 2.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 ret i1 %c } @@ -139,7 +139,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 2.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 ret i1 %c } @@ -163,7 +163,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ %f, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 ret i1 %c } @@ -187,7 +187,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ %f, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 ret i1 %c } @@ -211,7 +211,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ %f, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 ret i1 %c } @@ -236,7 +236,7 @@ dead: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.ignore") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.ignore") #0 ret i1 %c } @@ -261,7 +261,7 @@ dead: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.maytrap") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.maytrap") #0 ret i1 %c } @@ -288,11 +288,11 @@ dead: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ] - %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.strict") #0 + %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.strict") #0 ret i1 %c } attributes #0 = { strictfp } -declare i1 @llvm.experimental.constrained.fcmp.i1.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) diff --git a/llvm/test/Transforms/SCCP/strictfp-phis-fcmps.ll b/llvm/test/Transforms/SCCP/strictfp-phis-fcmps.ll index 6db1f47ccca9977..213293a78593851 100644 --- a/llvm/test/Transforms/SCCP/strictfp-phis-fcmps.ll +++ b/llvm/test/Transforms/SCCP/strictfp-phis-fcmps.ll @@ -19,7 +19,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 ret i1 %c } @@ -42,7 +42,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 ret i1 %c } @@ -66,7 +66,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 ret i1 %c } @@ -91,7 +91,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 2.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 ret i1 %c } @@ -115,7 +115,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 2.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 ret i1 %c } @@ -139,7 +139,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ 2.0, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 ret i1 %c } @@ -163,7 +163,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ %f, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0 ret i1 %c } @@ -187,7 +187,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ %f, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0 ret i1 %c } @@ -211,7 +211,7 @@ if.true: end: %p = phi float [ 1.0, %entry ], [ %f, %if.true] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0 ret i1 %c } @@ -236,7 +236,7 @@ dead: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.ignore") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.ignore") #0 ret i1 %c } @@ -261,7 +261,7 @@ dead: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.maytrap") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.maytrap") #0 ret i1 %c } @@ -288,11 +288,11 @@ dead: end: %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ] - %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.strict") #0 + %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.strict") #0 ret i1 %c } attributes #0 = { strictfp } -declare i1 @llvm.experimental.constrained.fcmps.i1.f32(float, float, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata) From 5155c38ceef019f3225c5eaeba6b986e2430752f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 28 Oct 2024 15:09:20 +0800 Subject: [PATCH 134/425] [InstCombine] Don't check uses of constant exprs (#113684) This patch skips constant expressions to avoid iterating over uses on other functions. Fix crash reported in https://github.com/llvm/llvm-project/pull/105510#issuecomment-2437521147. --- .../InstCombine/InstructionCombining.cpp | 4 +- llvm/test/Transforms/InstCombine/pr105510.ll | 43 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/InstCombine/pr105510.ll diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 971ace2a4f4716f..73a4705531781ab 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3753,7 +3753,9 @@ Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) { } // Replace all dominated uses of the condition with true/false - if (BI.getSuccessor(0) != BI.getSuccessor(1)) { + // Ignore constant expressions to avoid iterating over uses on other + // functions. + if (!isa(Cond) && BI.getSuccessor(0) != BI.getSuccessor(1)) { for (auto &U : make_early_inc_range(Cond->uses())) { BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(0)); if (DT.dominates(Edge0, U)) { diff --git a/llvm/test/Transforms/InstCombine/pr105510.ll b/llvm/test/Transforms/InstCombine/pr105510.ll new file mode 100644 index 000000000000000..844fa14ad991ee9 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr105510.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +; Make sure we don't crash in this case. +@g = global i32 0 + +define i1 @foo() { +; CHECK-LABEL: define i1 @foo() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 ptrtoint (ptr @g to i1), label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: ret i1 true +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: ret i1 false +; +entry: + br i1 ptrtoint (ptr @g to i1), label %if.then, label %if.else + +if.then: + ret i1 true + +if.else: + ret i1 false +} + +define i1 @bar() { +; CHECK-LABEL: define i1 @bar() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 ptrtoint (ptr @g to i1), label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: ret i1 true +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: ret i1 false +; +entry: + br i1 ptrtoint (ptr @g to i1), label %if.then, label %if.else + +if.then: + ret i1 true + +if.else: + ret i1 false +} From fd85761208b4fe0569e3fde0d11297f7dcd0e48e Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Mon, 28 Oct 2024 15:15:49 +0800 Subject: [PATCH 135/425] [X86][BF16] Customize VSELECT for BF16 under AVX-NECONVERT (#113322) Fixes: https://godbolt.org/z/9abGnE8zs --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 + .../CodeGen/X86/avxneconvert-intrinsics.ll | 114 ++++++++++++++++++ 2 files changed, 116 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9d447959faf55a9..1c790f3813b7a47 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2393,6 +2393,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom); for (auto VT : {MVT::v8bf16, MVT::v16bf16}) { setF16Action(VT, Expand); + if (!Subtarget.hasBF16()) + setOperationAction(ISD::VSELECT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); diff --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll index b311c8831457b80..ef87ac31fcf48cb 100644 --- a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll @@ -215,3 +215,117 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) { } declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A) +define <8 x bfloat> @select(i8 %x, <8 x bfloat> %y) nounwind { +; X64-LABEL: select: +; X64: # %bb.0: +; X64-NEXT: vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8] +; X64-NEXT: movb %dil, %al # encoding: [0x40,0x88,0xf8] +; X64-NEXT: movb %al, -{{[0-9]+}}(%rsp) # encoding: [0x88,0x44,0x24,0xff] +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0xff] +; X64-NEXT: movl %eax, %ecx # encoding: [0x89,0xc1] +; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X64-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] +; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X64-NEXT: shrb %cl # encoding: [0xd0,0xe9] +; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X64-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01] +; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X64-NEXT: shrb $2, %cl # encoding: [0xc0,0xe9,0x02] +; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X64-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02] +; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X64-NEXT: shrb $3, %cl # encoding: [0xc0,0xe9,0x03] +; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X64-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03] +; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X64-NEXT: shrb $4, %cl # encoding: [0xc0,0xe9,0x04] +; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X64-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] +; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X64-NEXT: shrb $5, %cl # encoding: [0xc0,0xe9,0x05] +; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X64-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05] +; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X64-NEXT: shrb $6, %cl # encoding: [0xc0,0xe9,0x06] +; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X64-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06] +; X64-NEXT: shrb $7, %al # encoding: [0xc0,0xe8,0x07] +; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] +; X64-NEXT: negl %eax # encoding: [0xf7,0xd8] +; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] +; X64-NEXT: vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1] +; X64-NEXT: retq # encoding: [0xc3] +; +; X86-LABEL: select: +; X86: # %bb.0: +; X86-NEXT: pushl %eax # encoding: [0x50] +; X86-NEXT: vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08] +; X86-NEXT: movb %al, {{[0-9]+}}(%esp) # encoding: [0x88,0x44,0x24,0x03] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x03] +; X86-NEXT: movl %eax, %ecx # encoding: [0x89,0xc1] +; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X86-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] +; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X86-NEXT: shrb %cl # encoding: [0xd0,0xe9] +; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01] +; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X86-NEXT: shrb $2, %cl # encoding: [0xc0,0xe9,0x02] +; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02] +; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X86-NEXT: shrb $3, %cl # encoding: [0xc0,0xe9,0x03] +; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X86-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03] +; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X86-NEXT: shrb $4, %cl # encoding: [0xc0,0xe9,0x04] +; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] +; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X86-NEXT: shrb $5, %cl # encoding: [0xc0,0xe9,0x05] +; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05] +; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1] +; X86-NEXT: shrb $6, %cl # encoding: [0xc0,0xe9,0x06] +; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9] +; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01] +; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9] +; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06] +; X86-NEXT: shrb $7, %al # encoding: [0xc0,0xe8,0x07] +; X86-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0] +; X86-NEXT: negl %eax # encoding: [0xf7,0xd8] +; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] +; X86-NEXT: vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1] +; X86-NEXT: popl %eax # encoding: [0x58] +; X86-NEXT: retl # encoding: [0xc3] + %1 = bitcast i8 %x to <8 x i1> + %2 = select <8 x i1> %1, <8 x bfloat> zeroinitializer, <8 x bfloat> %y + ret <8 x bfloat> %2 +} From e7f422d5c2ea05704eaab2cdd67a8b1ebc55e95b Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 28 Oct 2024 08:38:56 +0100 Subject: [PATCH 136/425] Add clang::lifetimebound annotation to ArrayRef constructors. (#113547) This enables clang to detect more dangling issues. ``` ArrayRef func() { constexpr int array[] = {...}; // oops, missing the static return array; // return a dangling reference, bomb. } ``` See #113533. --- llvm/include/llvm/ADT/ArrayRef.h | 15 +++++++++------ llvm/include/llvm/Support/Compiler.h | 6 ++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h index bf6b55923b84ba3..1139fd81cbd07f2 100644 --- a/llvm/include/llvm/ADT/ArrayRef.h +++ b/llvm/include/llvm/ADT/ArrayRef.h @@ -70,15 +70,16 @@ namespace llvm { /*implicit*/ ArrayRef(std::nullopt_t) {} /// Construct an ArrayRef from a single element. - /*implicit*/ ArrayRef(const T &OneElt) - : Data(&OneElt), Length(1) {} + /*implicit*/ ArrayRef(const T &OneElt LLVM_LIFETIME_BOUND) + : Data(&OneElt), Length(1) {} /// Construct an ArrayRef from a pointer and length. - constexpr /*implicit*/ ArrayRef(const T *data, size_t length) + constexpr /*implicit*/ ArrayRef(const T *data LLVM_LIFETIME_BOUND, + size_t length) : Data(data), Length(length) {} /// Construct an ArrayRef from a range. - constexpr ArrayRef(const T *begin, const T *end) + constexpr ArrayRef(const T *begin LLVM_LIFETIME_BOUND, const T *end) : Data(begin), Length(end - begin) { assert(begin <= end); } @@ -103,7 +104,8 @@ namespace llvm { /// Construct an ArrayRef from a C array. template - /*implicit*/ constexpr ArrayRef(const T (&Arr)[N]) : Data(Arr), Length(N) {} + /*implicit*/ constexpr ArrayRef(const T (&Arr LLVM_LIFETIME_BOUND)[N]) + : Data(Arr), Length(N) {} /// Construct an ArrayRef from a std::initializer_list. #if LLVM_GNUC_PREREQ(9, 0, 0) @@ -113,7 +115,8 @@ namespace llvm { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Winit-list-lifetime" #endif - constexpr /*implicit*/ ArrayRef(std::initializer_list Vec) + constexpr /*implicit*/ ArrayRef( + std::initializer_list Vec LLVM_LIFETIME_BOUND) : Data(Vec.begin() == Vec.end() ? (T *)nullptr : Vec.begin()), Length(Vec.size()) {} #if LLVM_GNUC_PREREQ(9, 0, 0) diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h index 591e7647795bb21..f9c57b89f1f033a 100644 --- a/llvm/include/llvm/Support/Compiler.h +++ b/llvm/include/llvm/Support/Compiler.h @@ -413,6 +413,12 @@ #define LLVM_GSL_POINTER #endif +#if LLVM_HAS_CPP_ATTRIBUTE(clang::lifetimebound) +#define LLVM_LIFETIME_BOUND [[clang::lifetimebound]] +#else +#define LLVM_LIFETIME_BOUND +#endif + #if LLVM_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L #define LLVM_CTOR_NODISCARD [[nodiscard]] #else From 35f6cc6af09f48f9038fce632815a2ad6ffe8689 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Mon, 28 Oct 2024 07:42:33 +0000 Subject: [PATCH 137/425] [RISCV] Add the Sha extension (#113820) This was introduced in the now-ratified RVA23 profile (and also added to the RVA22 text) as a simple way of referring to H plus the set of supervisor extensions required by RVA23. https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc This patch simply defines the extension. The next patch will adjust the RVA23 profile to use it, and at that point I think we will be ready to mark RVA23 as non-experimental. Note that I haven't made it so if you enable all extensions that constitute Sha, Sha is implied. Per #76893 (adding 'B'), the concern is making this implication might break older external assemblers. Perhaps this is less of a concern given the relative frequency of `-march=${foo}_zba_zbb_zbs` vs the collection of H extensions. If we did want to add that implication, we'd probably want to add it in a separate patch so it can be easily reverted if found to cause problems. --- clang/test/Driver/print-supported-extensions-riscv.c | 1 + clang/test/Preprocessor/riscv-target-features.c | 9 +++++++++ llvm/docs/RISCVUsage.rst | 1 + llvm/docs/ReleaseNotes.md | 1 + llvm/lib/Target/RISCV/RISCVFeatures.td | 7 +++++++ llvm/test/CodeGen/RISCV/attributes.ll | 4 ++++ llvm/unittests/TargetParser/RISCVISAInfoTest.cpp | 1 + 7 files changed, 24 insertions(+) diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index 342d6e921a5a83e..fc8a9c04667b6ba 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -110,6 +110,7 @@ // CHECK-NEXT: zvl8192b 1.0 'Zvl' (Minimum Vector Length) 8192 // CHECK-NEXT: zhinx 1.0 'Zhinx' (Half Float in Integer) // CHECK-NEXT: zhinxmin 1.0 'Zhinxmin' (Half Float in Integer Minimal) +// CHECK-NEXT: sha 1.0 'Sha' (Augmented Hypervisor) // CHECK-NEXT: shcounterenw 1.0 'Shcounterenw' (Support writeable hcounteren enable bit for any hpmcounter that is not read-only zero) // CHECK-NEXT: shgatpa 1.0 'Sgatpa' (SvNNx4 mode supported for all modes supported by satp, as well as Bare) // CHECK-NEXT: shtvala 1.0 'Shtvala' (htval provides all needed values) diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 98ad564d2b84084..6e586714af84d3c 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -20,6 +20,7 @@ // CHECK-NOT: __riscv_m {{.*$}} // CHECK-NOT: __riscv_mul {{.*$}} // CHECK-NOT: __riscv_muldiv {{.*$}} +// CHECK-NOT: __riscv_sha {{.*$}} // CHECK-NOT: __riscv_shcounterenw {{.*$}} // CHECK-NOT: __riscv_shgatpa {{.*$}} // CHECK-NOT: __riscv_shtvala {{.*$}} @@ -323,6 +324,14 @@ // CHECK-M-EXT: __riscv_mul 1 // CHECK-M-EXT: __riscv_muldiv 1 +// RUN: %clang --target=riscv32-unknown-linux-gnu \ +// RUN: -march=rv32isha -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s +// RUN: %clang --target=riscv64-unknown-linux-gnu \ +// RUN: -march=rv64isha -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s +// CHECK-SHA-EXT: __riscv_sha 1000000{{$}} + // RUN: %clang --target=riscv32-unknown-linux-gnu \ // RUN: -march=rv32ishcounterenw -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index ab58cdaa1b2f95d..6075a2289d473de 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -119,6 +119,7 @@ on support follow. ``E`` Supported (`See note <#riscv-rve-note>`__) ``H`` Assembly Support ``M`` Supported + ``Sha`` Supported ``Shcounterenw`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) ``Shgatpa`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) ``Shtvala`` Assembly Support (`See note <#riscv-profiles-extensions-note>`__) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 2580f09be3ad77b..f7215279940d693 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -183,6 +183,7 @@ Changes to the RISC-V Backend * The `Zacas` extension is no longer marked as experimental. * The `Smmpm`, `Smnpm`, `Ssnpm`, `Supm`, and `Sspm` pointer masking extensions are no longer marked as experimental. +* The `Sha` extension is now supported. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 778df542022f226..559f0e5950edddb 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1029,6 +1029,13 @@ def FeatureStdExtSvpbmt : RISCVExtension<"svpbmt", 1, 0, "'Svpbmt' (Page-Based Memory Types)">; +def FeatureStdExtSha + : RISCVExtension<"sha", 1, 0, + "'Sha' (Augmented Hypervisor)", + [FeatureStdExtH, FeatureStdExtSsstateen, FeatureStdExtShcounterenw, + FeatureStdExtShvstvala, FeatureStdExtShtvala, FeatureStdExtShvstvecd, + FeatureStdExtShvsatpa, FeatureStdExtShgatpa]>; + // Pointer Masking extensions // A supervisor-level extension that provides pointer masking for the next lower diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 9be9ddd05ee2900..4cc9ef2ea0d7ff8 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -39,6 +39,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zicbom %s -o - | FileCheck --check-prefixes=CHECK,RV32ZICBOM %s ; RUN: llc -mtriple=riscv32 -mattr=+zicboz %s -o - | FileCheck --check-prefixes=CHECK,RV32ZICBOZ %s ; RUN: llc -mtriple=riscv32 -mattr=+zicbop %s -o - | FileCheck --check-prefixes=CHECK,RV32ZICBOP %s +; RUN: llc -mtriple=riscv32 -mattr=+sha %s -o - | FileCheck --check-prefixes=CHECK,RV32SHA %s ; RUN: llc -mtriple=riscv32 -mattr=+shcounterenw %s -o - | FileCheck --check-prefixes=CHECK,RV32SHCOUNTERENW %s ; RUN: llc -mtriple=riscv32 -mattr=+shgatpa %s -o - | FileCheck --check-prefixes=CHECK,RV32SHGATPA %s ; RUN: llc -mtriple=riscv32 -mattr=+shvsatpa %s -o - | FileCheck --check-prefixes=CHECK,RV32SHVSATPA %s @@ -178,6 +179,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zicbom %s -o - | FileCheck --check-prefixes=CHECK,RV64ZICBOM %s ; RUN: llc -mtriple=riscv64 -mattr=+zicboz %s -o - | FileCheck --check-prefixes=CHECK,RV64ZICBOZ %s ; RUN: llc -mtriple=riscv64 -mattr=+zicbop %s -o - | FileCheck --check-prefixes=CHECK,RV64ZICBOP %s +; RUN: llc -mtriple=riscv64 -mattr=+sha %s -o - | FileCheck --check-prefixes=CHECK,RV64SHA %s ; RUN: llc -mtriple=riscv64 -mattr=+shcounterenw %s -o - | FileCheck --check-prefixes=CHECK,RV64SHCOUNTERENW %s ; RUN: llc -mtriple=riscv64 -mattr=+shgatpa %s -o - | FileCheck --check-prefixes=CHECK,RV64SHGATPA %s ; RUN: llc -mtriple=riscv64 -mattr=+shvsatpa %s -o - | FileCheck --check-prefixes=CHECK,RV64SHVSATPA %s @@ -333,6 +335,7 @@ ; RV32ZICBOM: .attribute 5, "rv32i2p1_zicbom1p0" ; RV32ZICBOZ: .attribute 5, "rv32i2p1_zicboz1p0" ; RV32ZICBOP: .attribute 5, "rv32i2p1_zicbop1p0" +; RV32SHA: .attribute 5, "rv32i2p1_h1p0_sha1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssstateen1p0" ; RV32SHCOUNTERENW: .attribute 5, "rv32i2p1_shcounterenw1p0" ; RV32SHGATPA: .attribute 5, "rv32i2p1_shgatpa1p0" ; RV32SHVSATPA: .attribute 5, "rv32i2p1_shvsatpa1p0" @@ -474,6 +477,7 @@ ; RV64ZAMA16B: .attribute 5, "rv64i2p1_zama16b1p0" ; RV64ZAWRS: .attribute 5, "rv64i2p1_zawrs1p0" ; RV64ZICBOP: .attribute 5, "rv64i2p1_zicbop1p0" +; RV64SHA: .attribute 5, "rv64i2p1_h1p0_sha1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssstateen1p0" ; RV64SHCOUNTERENW: .attribute 5, "rv64i2p1_shcounterenw1p0" ; RV64SHGATPA: .attribute 5, "rv64i2p1_shgatpa1p0" ; RV64SHVSATPA: .attribute 5, "rv64i2p1_shvsatpa1p0" diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 2e9c548ae872f39..e402b1a40de34d4 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -1055,6 +1055,7 @@ R"(All available -march extensions for RISC-V zvl8192b 1.0 zhinx 1.0 zhinxmin 1.0 + sha 1.0 shcounterenw 1.0 shgatpa 1.0 shtvala 1.0 From a4fd3dba6e285734bc635b0651a30dfeffedeada Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Mon, 28 Oct 2024 09:04:19 +0100 Subject: [PATCH 138/425] [AMDGPU] Use wider loop lowering type for LowerMemIntrinsics (#112332) When llvm.memcpy or llvm.memmove intrinsics are lowered as a loop in LowerMemIntrinsics.cpp, the loop consists of a single load/store pair per iteration. We can improve performance in some cases by emitting multiple load/store pairs per iteration. This patch achieves that by increasing the width of the loop lowering type in the GCN target and letting legalization split the resulting too-wide access pairs into multiple legal access pairs. This change only affects lowered memcpys and memmoves with large (>= 1024 bytes) constant lengths. Smaller constant lengths are handled by ISel directly; non-constant lengths would be slowed down by this change if the dynamic length was smaller or slightly larger than what an unrolled iteration copies. The chosen default unroll factor is the result of microbenchmarks on gfx1030. This change leads to speedups of 15-38% for global memory and 1.9-5.8x for scratch in these microbenchmarks. Part of SWDEV-455845. --- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 38 +- .../CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll | 308 +- .../CodeGen/AMDGPU/lower-mem-intrinsics.ll | 284 +- .../CodeGen/AMDGPU/memintrinsic-unroll.ll | 16049 ++++++++++++++++ 4 files changed, 16445 insertions(+), 234 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 8f9495d83cde2dc..5160851f8c4424d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -75,6 +75,13 @@ static cl::opt InlineMaxBB( cl::desc("Maximum number of BBs allowed in a function after inlining" " (compile time constraint)")); +// This default unroll factor is based on microbenchmarks on gfx1030. +static cl::opt MemcpyLoopUnroll( + "amdgpu-memcpy-loop-unroll", + cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory " + "operations when lowering memcpy as a loop"), + cl::init(16), cl::Hidden); + static bool dependsOnLocalPhi(const Loop *L, const Value *Cond, unsigned Depth = 0) { const Instruction *I = dyn_cast(Cond); @@ -409,13 +416,8 @@ int64_t GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const { return 1024; } -// FIXME: Really we would like to issue multiple 128-bit loads and stores per -// iteration. Should we report a larger size and let it legalize? -// // FIXME: Should we use narrower types for local/region, or account for when // unaligned access is legal? -// -// FIXME: This could use fine tuning and microbenchmarks. Type *GCNTTIImpl::getMemcpyLoopLoweringType( LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, @@ -442,9 +444,22 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType( return FixedVectorType::get(Type::getInt32Ty(Context), 2); } - // Global memory works best with 16-byte accesses. Private memory will also - // hit this, although they'll be decomposed. - return FixedVectorType::get(Type::getInt32Ty(Context), 4); + // Global memory works best with 16-byte accesses. + // If the operation has a fixed known length that is large enough, it is + // worthwhile to return an even wider type and let legalization lower it into + // multiple accesses, effectively unrolling the memcpy loop. Private memory + // also hits this, although accesses may be decomposed. + // + // Don't unroll if Length is not a constant, since unrolling leads to worse + // performance for length values that are smaller or slightly larger than the + // total size of the type returned here. Mitigating that would require a more + // complex lowering for variable-length memcpy and memmove. + unsigned I32EltsInVector = 4; + if (MemcpyLoopUnroll > 0 && isa(Length)) + return FixedVectorType::get(Type::getInt32Ty(Context), + MemcpyLoopUnroll * I32EltsInVector); + + return FixedVectorType::get(Type::getInt32Ty(Context), I32EltsInVector); } void GCNTTIImpl::getMemcpyLoopResidualLoweringType( @@ -452,7 +467,6 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType( unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional AtomicCpySize) const { - assert(RemainingBytes < 16); if (AtomicCpySize) BaseT::getMemcpyLoopResidualLoweringType( @@ -462,6 +476,12 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType( Align MinAlign = std::min(SrcAlign, DestAlign); if (MinAlign != Align(2)) { + Type *I32x4Ty = FixedVectorType::get(Type::getInt32Ty(Context), 4); + while (RemainingBytes >= 16) { + OpsOut.push_back(I32x4Ty); + RemainingBytes -= 16; + } + Type *I64Ty = Type::getInt64Ty(Context); while (RemainingBytes >= 8) { OpsOut.push_back(I64Ty); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll index 7f23434c9dfdd6c..75d4d8816fb30d2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=19 %s -o - | FileCheck -check-prefix=LOOP %s -; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=21 %s -o - | FileCheck -check-prefix=UNROLL %s +; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -amdgpu-memcpy-loop-unroll=2 -mem-intrinsic-expand-size=35 %s -o - | FileCheck -check-prefix=LOOP %s +; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -amdgpu-memcpy-loop-unroll=2 -mem-intrinsic-expand-size=37 %s -o - | FileCheck -check-prefix=UNROLL %s declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg) @@ -14,104 +14,176 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src) ; LOOP-NEXT: v_mov_b32_e32 v4, s0 ; LOOP-NEXT: .LBB0_1: ; %load-store-loop ; LOOP-NEXT: ; =>This Inner Loop Header: Depth=1 +; LOOP-NEXT: s_waitcnt expcnt(2) +; LOOP-NEXT: v_add_i32_e32 v29, vcc, v2, v4 +; LOOP-NEXT: v_addc_u32_e32 v30, vcc, v3, v5, vcc +; LOOP-NEXT: buffer_load_ubyte v24, v[29:30], s[0:3], 0 addr64 +; LOOP-NEXT: buffer_load_ubyte v27, v[29:30], s[0:3], 0 addr64 offset:1 +; LOOP-NEXT: buffer_load_ubyte v34, v[29:30], s[0:3], 0 addr64 offset:2 +; LOOP-NEXT: buffer_load_ubyte v35, v[29:30], s[0:3], 0 addr64 offset:3 +; LOOP-NEXT: buffer_load_ubyte v36, v[29:30], s[0:3], 0 addr64 offset:4 +; LOOP-NEXT: buffer_load_ubyte v37, v[29:30], s[0:3], 0 addr64 offset:5 +; LOOP-NEXT: buffer_load_ubyte v38, v[29:30], s[0:3], 0 addr64 offset:6 +; LOOP-NEXT: buffer_load_ubyte v39, v[29:30], s[0:3], 0 addr64 offset:7 +; LOOP-NEXT: buffer_load_ubyte v6, v[29:30], s[0:3], 0 addr64 offset:8 +; LOOP-NEXT: buffer_load_ubyte v9, v[29:30], s[0:3], 0 addr64 offset:9 +; LOOP-NEXT: buffer_load_ubyte v10, v[29:30], s[0:3], 0 addr64 offset:10 ; LOOP-NEXT: s_waitcnt expcnt(0) -; LOOP-NEXT: v_add_i32_e32 v6, vcc, v2, v4 -; LOOP-NEXT: v_addc_u32_e32 v7, vcc, v3, v5, vcc -; LOOP-NEXT: v_add_i32_e32 v8, vcc, v0, v4 -; LOOP-NEXT: v_addc_u32_e32 v9, vcc, v1, v5, vcc -; LOOP-NEXT: v_add_i32_e32 v4, vcc, 16, v4 -; LOOP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; LOOP-NEXT: buffer_load_ubyte v10, v[6:7], s[0:3], 0 addr64 -; LOOP-NEXT: buffer_load_ubyte v11, v[6:7], s[0:3], 0 addr64 offset:1 -; LOOP-NEXT: buffer_load_ubyte v12, v[6:7], s[0:3], 0 addr64 offset:2 -; LOOP-NEXT: buffer_load_ubyte v13, v[6:7], s[0:3], 0 addr64 offset:3 -; LOOP-NEXT: buffer_load_ubyte v14, v[6:7], s[0:3], 0 addr64 offset:4 -; LOOP-NEXT: buffer_load_ubyte v15, v[6:7], s[0:3], 0 addr64 offset:5 -; LOOP-NEXT: buffer_load_ubyte v16, v[6:7], s[0:3], 0 addr64 offset:6 -; LOOP-NEXT: buffer_load_ubyte v17, v[6:7], s[0:3], 0 addr64 offset:7 -; LOOP-NEXT: buffer_load_ubyte v18, v[6:7], s[0:3], 0 addr64 offset:8 -; LOOP-NEXT: buffer_load_ubyte v19, v[6:7], s[0:3], 0 addr64 offset:9 -; LOOP-NEXT: buffer_load_ubyte v20, v[6:7], s[0:3], 0 addr64 offset:10 -; LOOP-NEXT: buffer_load_ubyte v21, v[6:7], s[0:3], 0 addr64 offset:11 -; LOOP-NEXT: buffer_load_ubyte v22, v[6:7], s[0:3], 0 addr64 offset:12 -; LOOP-NEXT: buffer_load_ubyte v23, v[6:7], s[0:3], 0 addr64 offset:13 -; LOOP-NEXT: buffer_load_ubyte v24, v[6:7], s[0:3], 0 addr64 offset:14 -; LOOP-NEXT: buffer_load_ubyte v6, v[6:7], s[0:3], 0 addr64 offset:15 -; LOOP-NEXT: v_cmp_gt_u32_e32 vcc, 16, v4 +; LOOP-NEXT: buffer_load_ubyte v11, v[29:30], s[0:3], 0 addr64 offset:11 +; LOOP-NEXT: buffer_load_ubyte v7, v[29:30], s[0:3], 0 addr64 offset:12 +; LOOP-NEXT: buffer_load_ubyte v13, v[29:30], s[0:3], 0 addr64 offset:13 +; LOOP-NEXT: buffer_load_ubyte v14, v[29:30], s[0:3], 0 addr64 offset:14 +; LOOP-NEXT: buffer_load_ubyte v15, v[29:30], s[0:3], 0 addr64 offset:15 +; LOOP-NEXT: buffer_load_ubyte v8, v[29:30], s[0:3], 0 addr64 offset:16 +; LOOP-NEXT: buffer_load_ubyte v17, v[29:30], s[0:3], 0 addr64 offset:17 +; LOOP-NEXT: buffer_load_ubyte v18, v[29:30], s[0:3], 0 addr64 offset:18 +; LOOP-NEXT: buffer_load_ubyte v19, v[29:30], s[0:3], 0 addr64 offset:19 +; LOOP-NEXT: buffer_load_ubyte v12, v[29:30], s[0:3], 0 addr64 offset:20 +; LOOP-NEXT: buffer_load_ubyte v21, v[29:30], s[0:3], 0 addr64 offset:21 +; LOOP-NEXT: buffer_load_ubyte v22, v[29:30], s[0:3], 0 addr64 offset:22 +; LOOP-NEXT: buffer_load_ubyte v23, v[29:30], s[0:3], 0 addr64 offset:23 +; LOOP-NEXT: buffer_load_ubyte v16, v[29:30], s[0:3], 0 addr64 offset:24 +; LOOP-NEXT: buffer_load_ubyte v25, v[29:30], s[0:3], 0 addr64 offset:25 +; LOOP-NEXT: buffer_load_ubyte v26, v[29:30], s[0:3], 0 addr64 offset:26 +; LOOP-NEXT: buffer_load_ubyte v28, v[29:30], s[0:3], 0 addr64 offset:27 +; LOOP-NEXT: buffer_load_ubyte v20, v[29:30], s[0:3], 0 addr64 offset:28 +; LOOP-NEXT: buffer_load_ubyte v31, v[29:30], s[0:3], 0 addr64 offset:29 +; LOOP-NEXT: buffer_load_ubyte v32, v[29:30], s[0:3], 0 addr64 offset:30 +; LOOP-NEXT: buffer_load_ubyte v33, v[29:30], s[0:3], 0 addr64 offset:31 ; LOOP-NEXT: s_waitcnt vmcnt(14) -; LOOP-NEXT: v_lshlrev_b32_e32 v7, 8, v11 +; LOOP-NEXT: v_lshlrev_b32_e32 v27, 8, v27 +; LOOP-NEXT: v_or_b32_e32 v24, v27, v24 +; LOOP-NEXT: v_lshlrev_b32_e32 v27, 24, v35 +; LOOP-NEXT: v_lshlrev_b32_e32 v29, 16, v34 +; LOOP-NEXT: v_or_b32_e32 v27, v27, v29 +; LOOP-NEXT: v_lshlrev_b32_e32 v29, 8, v37 +; LOOP-NEXT: v_lshlrev_b32_e32 v30, 24, v39 +; LOOP-NEXT: v_lshlrev_b32_e32 v34, 16, v38 +; LOOP-NEXT: v_or_b32_e32 v29, v29, v36 +; LOOP-NEXT: v_or_b32_e32 v30, v30, v34 +; LOOP-NEXT: v_add_i32_e32 v34, vcc, v0, v4 +; LOOP-NEXT: v_addc_u32_e32 v35, vcc, v1, v5, vcc +; LOOP-NEXT: v_add_i32_e32 v4, vcc, 32, v4 +; LOOP-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc +; LOOP-NEXT: v_cmp_gt_u32_e32 vcc, 32, v4 +; LOOP-NEXT: v_lshlrev_b32_e32 v9, 8, v9 +; LOOP-NEXT: v_lshlrev_b32_e32 v11, 24, v11 +; LOOP-NEXT: v_lshlrev_b32_e32 v10, 16, v10 +; LOOP-NEXT: v_lshlrev_b32_e32 v13, 8, v13 +; LOOP-NEXT: v_lshlrev_b32_e32 v15, 24, v15 +; LOOP-NEXT: v_lshlrev_b32_e32 v14, 16, v14 +; LOOP-NEXT: v_lshlrev_b32_e32 v17, 8, v17 ; LOOP-NEXT: s_waitcnt vmcnt(12) -; LOOP-NEXT: v_lshlrev_b32_e32 v11, 24, v13 -; LOOP-NEXT: v_lshlrev_b32_e32 v12, 16, v12 +; LOOP-NEXT: v_lshlrev_b32_e32 v19, 24, v19 +; LOOP-NEXT: v_lshlrev_b32_e32 v18, 16, v18 ; LOOP-NEXT: s_waitcnt vmcnt(10) -; LOOP-NEXT: v_lshlrev_b32_e32 v13, 8, v15 +; LOOP-NEXT: v_lshlrev_b32_e32 v21, 8, v21 ; LOOP-NEXT: s_waitcnt vmcnt(8) -; LOOP-NEXT: v_lshlrev_b32_e32 v15, 24, v17 -; LOOP-NEXT: v_lshlrev_b32_e32 v16, 16, v16 +; LOOP-NEXT: v_lshlrev_b32_e32 v23, 24, v23 +; LOOP-NEXT: v_lshlrev_b32_e32 v22, 16, v22 ; LOOP-NEXT: s_waitcnt vmcnt(6) -; LOOP-NEXT: v_lshlrev_b32_e32 v17, 8, v19 +; LOOP-NEXT: v_lshlrev_b32_e32 v25, 8, v25 ; LOOP-NEXT: s_waitcnt vmcnt(4) -; LOOP-NEXT: v_lshlrev_b32_e32 v19, 24, v21 -; LOOP-NEXT: v_lshlrev_b32_e32 v20, 16, v20 +; LOOP-NEXT: v_lshlrev_b32_e32 v28, 24, v28 +; LOOP-NEXT: v_lshlrev_b32_e32 v26, 16, v26 ; LOOP-NEXT: s_waitcnt vmcnt(2) -; LOOP-NEXT: v_lshlrev_b32_e32 v21, 8, v23 +; LOOP-NEXT: v_lshlrev_b32_e32 v31, 8, v31 ; LOOP-NEXT: s_waitcnt vmcnt(0) -; LOOP-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; LOOP-NEXT: v_lshlrev_b32_e32 v23, 16, v24 -; LOOP-NEXT: v_or_b32_e32 v7, v7, v10 -; LOOP-NEXT: v_or_b32_e32 v10, v11, v12 -; LOOP-NEXT: v_or_b32_e32 v11, v13, v14 -; LOOP-NEXT: v_or_b32_e32 v12, v15, v16 -; LOOP-NEXT: v_or_b32_e32 v13, v17, v18 -; LOOP-NEXT: v_or_b32_e32 v14, v19, v20 -; LOOP-NEXT: v_or_b32_e32 v15, v21, v22 -; LOOP-NEXT: v_or_b32_e32 v6, v6, v23 +; LOOP-NEXT: v_lshlrev_b32_e32 v33, 24, v33 +; LOOP-NEXT: v_lshlrev_b32_e32 v32, 16, v32 +; LOOP-NEXT: v_or_b32_e32 v6, v9, v6 +; LOOP-NEXT: v_or_b32_e32 v9, v11, v10 +; LOOP-NEXT: v_or_b32_e32 v7, v13, v7 +; LOOP-NEXT: v_or_b32_e32 v10, v15, v14 +; LOOP-NEXT: v_or_b32_e32 v8, v17, v8 +; LOOP-NEXT: v_or_b32_e32 v11, v19, v18 +; LOOP-NEXT: v_or_b32_e32 v12, v21, v12 +; LOOP-NEXT: v_or_b32_e32 v13, v23, v22 +; LOOP-NEXT: v_or_b32_e32 v14, v25, v16 +; LOOP-NEXT: v_or_b32_e32 v15, v28, v26 +; LOOP-NEXT: v_or_b32_e32 v16, v31, v20 +; LOOP-NEXT: v_or_b32_e32 v17, v33, v32 +; LOOP-NEXT: v_or_b32_e32 v18, v27, v24 +; LOOP-NEXT: v_or_b32_e32 v19, v30, v29 +; LOOP-NEXT: v_or_b32_e32 v6, v9, v6 ; LOOP-NEXT: v_or_b32_e32 v7, v10, v7 -; LOOP-NEXT: v_or_b32_e32 v10, v12, v11 -; LOOP-NEXT: v_or_b32_e32 v11, v14, v13 -; LOOP-NEXT: v_or_b32_e32 v6, v6, v15 -; LOOP-NEXT: v_lshrrev_b32_e32 v12, 16, v7 -; LOOP-NEXT: v_bfe_u32 v13, v7, 8, 8 -; LOOP-NEXT: buffer_store_byte v7, v[8:9], s[0:3], 0 addr64 +; LOOP-NEXT: v_or_b32_e32 v8, v11, v8 +; LOOP-NEXT: v_or_b32_e32 v9, v13, v12 +; LOOP-NEXT: v_or_b32_e32 v10, v15, v14 +; LOOP-NEXT: v_or_b32_e32 v11, v17, v16 +; LOOP-NEXT: v_lshrrev_b32_e32 v12, 16, v18 +; LOOP-NEXT: v_bfe_u32 v13, v18, 8, 8 +; LOOP-NEXT: buffer_store_byte v18, v[34:35], s[0:3], 0 addr64 +; LOOP-NEXT: v_lshrrev_b32_e32 v14, 24, v18 +; LOOP-NEXT: v_lshrrev_b32_e32 v15, 16, v19 +; LOOP-NEXT: v_bfe_u32 v16, v19, 8, 8 +; LOOP-NEXT: buffer_store_byte v19, v[34:35], s[0:3], 0 addr64 offset:4 +; LOOP-NEXT: v_lshrrev_b32_e32 v17, 24, v19 +; LOOP-NEXT: s_waitcnt expcnt(1) +; LOOP-NEXT: v_lshrrev_b32_e32 v18, 16, v6 +; LOOP-NEXT: s_waitcnt expcnt(0) +; LOOP-NEXT: v_bfe_u32 v19, v6, 8, 8 +; LOOP-NEXT: buffer_store_byte v6, v[34:35], s[0:3], 0 addr64 offset:8 +; LOOP-NEXT: s_waitcnt expcnt(0) +; LOOP-NEXT: v_lshrrev_b32_e32 v6, 24, v6 +; LOOP-NEXT: v_lshrrev_b32_e32 v20, 16, v7 +; LOOP-NEXT: v_bfe_u32 v21, v7, 8, 8 +; LOOP-NEXT: buffer_store_byte v7, v[34:35], s[0:3], 0 addr64 offset:12 ; LOOP-NEXT: s_waitcnt expcnt(0) ; LOOP-NEXT: v_lshrrev_b32_e32 v7, 24, v7 -; LOOP-NEXT: v_lshrrev_b32_e32 v14, 16, v10 -; LOOP-NEXT: v_bfe_u32 v15, v10, 8, 8 -; LOOP-NEXT: buffer_store_byte v10, v[8:9], s[0:3], 0 addr64 offset:4 +; LOOP-NEXT: v_lshrrev_b32_e32 v22, 16, v8 +; LOOP-NEXT: v_bfe_u32 v23, v8, 8, 8 +; LOOP-NEXT: buffer_store_byte v8, v[34:35], s[0:3], 0 addr64 offset:16 +; LOOP-NEXT: s_waitcnt expcnt(0) +; LOOP-NEXT: v_lshrrev_b32_e32 v8, 24, v8 +; LOOP-NEXT: v_lshrrev_b32_e32 v24, 16, v9 +; LOOP-NEXT: v_bfe_u32 v25, v9, 8, 8 +; LOOP-NEXT: buffer_store_byte v9, v[34:35], s[0:3], 0 addr64 offset:20 +; LOOP-NEXT: s_waitcnt expcnt(0) +; LOOP-NEXT: v_lshrrev_b32_e32 v9, 24, v9 +; LOOP-NEXT: v_lshrrev_b32_e32 v26, 16, v10 +; LOOP-NEXT: v_bfe_u32 v27, v10, 8, 8 +; LOOP-NEXT: buffer_store_byte v10, v[34:35], s[0:3], 0 addr64 offset:24 ; LOOP-NEXT: s_waitcnt expcnt(0) ; LOOP-NEXT: v_lshrrev_b32_e32 v10, 24, v10 -; LOOP-NEXT: v_lshrrev_b32_e32 v16, 16, v11 -; LOOP-NEXT: v_bfe_u32 v17, v11, 8, 8 -; LOOP-NEXT: buffer_store_byte v11, v[8:9], s[0:3], 0 addr64 offset:8 +; LOOP-NEXT: v_lshrrev_b32_e32 v28, 16, v11 +; LOOP-NEXT: v_bfe_u32 v29, v11, 8, 8 +; LOOP-NEXT: buffer_store_byte v11, v[34:35], s[0:3], 0 addr64 offset:28 ; LOOP-NEXT: s_waitcnt expcnt(0) ; LOOP-NEXT: v_lshrrev_b32_e32 v11, 24, v11 -; LOOP-NEXT: v_lshrrev_b32_e32 v18, 16, v6 -; LOOP-NEXT: v_bfe_u32 v19, v6, 8, 8 -; LOOP-NEXT: buffer_store_byte v6, v[8:9], s[0:3], 0 addr64 offset:12 -; LOOP-NEXT: s_waitcnt expcnt(0) -; LOOP-NEXT: v_lshrrev_b32_e32 v6, 24, v6 -; LOOP-NEXT: buffer_store_byte v13, v[8:9], s[0:3], 0 addr64 offset:1 -; LOOP-NEXT: buffer_store_byte v12, v[8:9], s[0:3], 0 addr64 offset:2 -; LOOP-NEXT: buffer_store_byte v7, v[8:9], s[0:3], 0 addr64 offset:3 -; LOOP-NEXT: buffer_store_byte v15, v[8:9], s[0:3], 0 addr64 offset:5 -; LOOP-NEXT: buffer_store_byte v14, v[8:9], s[0:3], 0 addr64 offset:6 -; LOOP-NEXT: buffer_store_byte v10, v[8:9], s[0:3], 0 addr64 offset:7 -; LOOP-NEXT: buffer_store_byte v17, v[8:9], s[0:3], 0 addr64 offset:9 -; LOOP-NEXT: buffer_store_byte v16, v[8:9], s[0:3], 0 addr64 offset:10 -; LOOP-NEXT: buffer_store_byte v11, v[8:9], s[0:3], 0 addr64 offset:11 -; LOOP-NEXT: buffer_store_byte v19, v[8:9], s[0:3], 0 addr64 offset:13 -; LOOP-NEXT: buffer_store_byte v18, v[8:9], s[0:3], 0 addr64 offset:14 -; LOOP-NEXT: buffer_store_byte v6, v[8:9], s[0:3], 0 addr64 offset:15 +; LOOP-NEXT: buffer_store_byte v13, v[34:35], s[0:3], 0 addr64 offset:1 +; LOOP-NEXT: buffer_store_byte v12, v[34:35], s[0:3], 0 addr64 offset:2 +; LOOP-NEXT: buffer_store_byte v14, v[34:35], s[0:3], 0 addr64 offset:3 +; LOOP-NEXT: buffer_store_byte v16, v[34:35], s[0:3], 0 addr64 offset:5 +; LOOP-NEXT: buffer_store_byte v15, v[34:35], s[0:3], 0 addr64 offset:6 +; LOOP-NEXT: buffer_store_byte v17, v[34:35], s[0:3], 0 addr64 offset:7 +; LOOP-NEXT: buffer_store_byte v19, v[34:35], s[0:3], 0 addr64 offset:9 +; LOOP-NEXT: buffer_store_byte v18, v[34:35], s[0:3], 0 addr64 offset:10 +; LOOP-NEXT: buffer_store_byte v6, v[34:35], s[0:3], 0 addr64 offset:11 +; LOOP-NEXT: buffer_store_byte v21, v[34:35], s[0:3], 0 addr64 offset:13 +; LOOP-NEXT: buffer_store_byte v20, v[34:35], s[0:3], 0 addr64 offset:14 +; LOOP-NEXT: buffer_store_byte v7, v[34:35], s[0:3], 0 addr64 offset:15 +; LOOP-NEXT: buffer_store_byte v23, v[34:35], s[0:3], 0 addr64 offset:17 +; LOOP-NEXT: buffer_store_byte v22, v[34:35], s[0:3], 0 addr64 offset:18 +; LOOP-NEXT: buffer_store_byte v8, v[34:35], s[0:3], 0 addr64 offset:19 +; LOOP-NEXT: buffer_store_byte v25, v[34:35], s[0:3], 0 addr64 offset:21 +; LOOP-NEXT: buffer_store_byte v24, v[34:35], s[0:3], 0 addr64 offset:22 +; LOOP-NEXT: buffer_store_byte v9, v[34:35], s[0:3], 0 addr64 offset:23 +; LOOP-NEXT: buffer_store_byte v27, v[34:35], s[0:3], 0 addr64 offset:25 +; LOOP-NEXT: buffer_store_byte v26, v[34:35], s[0:3], 0 addr64 offset:26 +; LOOP-NEXT: buffer_store_byte v10, v[34:35], s[0:3], 0 addr64 offset:27 +; LOOP-NEXT: buffer_store_byte v29, v[34:35], s[0:3], 0 addr64 offset:29 +; LOOP-NEXT: buffer_store_byte v28, v[34:35], s[0:3], 0 addr64 offset:30 +; LOOP-NEXT: buffer_store_byte v11, v[34:35], s[0:3], 0 addr64 offset:31 ; LOOP-NEXT: s_cbranch_vccnz .LBB0_1 ; LOOP-NEXT: ; %bb.2: ; %memcpy-split ; LOOP-NEXT: s_mov_b32 s2, 0 ; LOOP-NEXT: s_mov_b32 s3, 0xf000 ; LOOP-NEXT: s_mov_b64 s[0:1], 0 -; LOOP-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:17 -; LOOP-NEXT: buffer_load_ubyte v5, v[2:3], s[0:3], 0 addr64 offset:19 -; LOOP-NEXT: s_waitcnt expcnt(0) -; LOOP-NEXT: buffer_load_ubyte v6, v[2:3], s[0:3], 0 addr64 offset:18 -; LOOP-NEXT: buffer_load_ubyte v2, v[2:3], s[0:3], 0 addr64 offset:16 +; LOOP-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:33 +; LOOP-NEXT: buffer_load_ubyte v5, v[2:3], s[0:3], 0 addr64 offset:35 +; LOOP-NEXT: buffer_load_ubyte v6, v[2:3], s[0:3], 0 addr64 offset:34 +; LOOP-NEXT: buffer_load_ubyte v2, v[2:3], s[0:3], 0 addr64 offset:32 ; LOOP-NEXT: s_waitcnt vmcnt(3) ; LOOP-NEXT: v_lshlrev_b32_e32 v3, 8, v4 ; LOOP-NEXT: s_waitcnt vmcnt(2) @@ -124,12 +196,12 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src) ; LOOP-NEXT: v_or_b32_e32 v2, v3, v2 ; LOOP-NEXT: v_lshrrev_b32_e32 v3, 16, v2 ; LOOP-NEXT: v_bfe_u32 v4, v2, 8, 8 -; LOOP-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:16 +; LOOP-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:32 ; LOOP-NEXT: s_waitcnt expcnt(0) ; LOOP-NEXT: v_lshrrev_b32_e32 v2, 24, v2 -; LOOP-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:17 -; LOOP-NEXT: buffer_store_byte v3, v[0:1], s[0:3], 0 addr64 offset:18 -; LOOP-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:19 +; LOOP-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:33 +; LOOP-NEXT: buffer_store_byte v3, v[0:1], s[0:3], 0 addr64 offset:34 +; LOOP-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:35 ; LOOP-NEXT: s_endpgm ; ; UNROLL-LABEL: memcpy_p1i8: @@ -212,11 +284,75 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src) ; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:18 ; UNROLL-NEXT: s_waitcnt vmcnt(0) ; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:18 -; UNROLL-NEXT: buffer_load_ubyte v2, v[2:3], s[0:3], 0 addr64 offset:19 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:19 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:19 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:20 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:20 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:21 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:21 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:22 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:22 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:23 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:23 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:24 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:24 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:25 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:25 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:26 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:26 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:27 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:27 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:28 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:28 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:29 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:29 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:30 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:30 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:31 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:31 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:32 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:32 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:33 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:33 +; UNROLL-NEXT: s_waitcnt expcnt(0) +; UNROLL-NEXT: buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:34 +; UNROLL-NEXT: s_waitcnt vmcnt(0) +; UNROLL-NEXT: buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:34 +; UNROLL-NEXT: buffer_load_ubyte v2, v[2:3], s[0:3], 0 addr64 offset:35 ; UNROLL-NEXT: s_waitcnt vmcnt(0) -; UNROLL-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:19 +; UNROLL-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:35 ; UNROLL-NEXT: s_endpgm - call void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 20, i1 false) + call void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 36, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll index a95f22507eece3c..ffe9e06c04ae453 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll @@ -46,10 +46,10 @@ define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1) ; ALL: load-store-loop: ; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1 +; ALL-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 1 ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1 -; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; ALL-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1 +; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; ALL-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; ALL: memcpy-split: @@ -66,10 +66,10 @@ define amdgpu_kernel void @min_size_large_static_memcpy_caller0(ptr addrspace(1) ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 1 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -93,20 +93,20 @@ define amdgpu_kernel void @max_size_small_static_memmove_caller0(ptr addrspace(1 ; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]] ; ALL: memmove_bwd_loop: ; ALL-NEXT: [[TMP1:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1024, [[TMP0:%.*]] ] -; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP1]], 16 +; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP1]], 256 ; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]] -; ALL-NEXT: [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP2]], align 1 +; ALL-NEXT: [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP2]], align 1 ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1 +; ALL-NEXT: store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1 ; ALL-NEXT: [[TMP4:%.*]] = icmp eq i64 [[BWD_INDEX]], 0 ; ALL-NEXT: br i1 [[TMP4]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; ALL: memmove_fwd_loop: ; ALL-NEXT: [[FWD_INDEX:%.*]] = phi i64 [ [[TMP7:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ] ; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]] -; ALL-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 +; ALL-NEXT: [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP5]], align 1 ; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1 -; ALL-NEXT: [[TMP7]] = add i64 [[FWD_INDEX]], 16 +; ALL-NEXT: store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1 +; ALL-NEXT: [[TMP7]] = add i64 [[FWD_INDEX]], 256 ; ALL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 1024 ; ALL-NEXT: br i1 [[TMP8]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]] ; ALL: memmove_done: @@ -128,20 +128,20 @@ define amdgpu_kernel void @min_size_large_static_memmove_caller0(ptr addrspace(1 ; OPT-NEXT: br label [[MEMMOVE_BWD_LOOP:%.*]] ; OPT: memmove_bwd_loop: ; OPT-NEXT: [[TMP4:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1024, [[MEMMOVE_BWD_RESIDUAL]] ] -; OPT-NEXT: [[BWD_INDEX]] = sub i64 [[TMP4]], 16 +; OPT-NEXT: [[BWD_INDEX]] = sub i64 [[TMP4]], 256 ; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]] -; OPT-NEXT: [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 +; OPT-NEXT: [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP5]], align 1 ; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]] -; OPT-NEXT: store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP6]], align 1 +; OPT-NEXT: store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP6]], align 1 ; OPT-NEXT: [[TMP7:%.*]] = icmp eq i64 [[BWD_INDEX]], 0 ; OPT-NEXT: br i1 [[TMP7]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; OPT: memmove_fwd_loop: ; OPT-NEXT: [[FWD_INDEX:%.*]] = phi i64 [ [[TMP10:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0:%.*]] ] ; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]] -; OPT-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP8]], align 1 +; OPT-NEXT: [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP8]], align 1 ; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]] -; OPT-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP9]], align 1 -; OPT-NEXT: [[TMP10]] = add i64 [[FWD_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP9]], align 1 +; OPT-NEXT: [[TMP10]] = add i64 [[FWD_INDEX]], 256 ; OPT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[TMP10]], 1024 ; OPT-NEXT: br i1 [[TMP11]], label [[MEMMOVE_FWD_RESIDUAL:%.*]], label [[MEMMOVE_FWD_LOOP]] ; OPT: memmove_fwd_residual: @@ -421,17 +421,30 @@ define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspac ; ALL-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]] ; ALL-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; ALL: post-loop-memcpy-expansion: -; ALL-NEXT: br label [[LOAD_STORE_LOOP:%.*]] -; ALL: load-store-loop: -; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP19:%.*]], [[LOAD_STORE_LOOP]] ] -; ALL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX]] +; ALL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 0 ; ALL-NEXT: [[TMP17:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP16]], align 1 -; ALL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX]] +; ALL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1:%.*]], i64 0 ; ALL-NEXT: store <4 x i32> [[TMP17]], ptr addrspace(1) [[TMP18]], align 1 -; ALL-NEXT: [[TMP19]] = add i64 [[LOOP_INDEX]], 16 -; ALL-NEXT: [[TMP20:%.*]] = icmp ult i64 [[TMP19]], 96 -; ALL-NEXT: br i1 [[TMP20]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] -; ALL: memcpy-split: +; ALL-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 16 +; ALL-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP33]], align 1 +; ALL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 16 +; ALL-NEXT: store <4 x i32> [[TMP19]], ptr addrspace(1) [[TMP20]], align 1 +; ALL-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 32 +; ALL-NEXT: [[TMP35:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP34]], align 1 +; ALL-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 32 +; ALL-NEXT: store <4 x i32> [[TMP35]], ptr addrspace(1) [[TMP36]], align 1 +; ALL-NEXT: [[TMP37:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 48 +; ALL-NEXT: [[TMP38:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP37]], align 1 +; ALL-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 48 +; ALL-NEXT: store <4 x i32> [[TMP38]], ptr addrspace(1) [[TMP39]], align 1 +; ALL-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 64 +; ALL-NEXT: [[TMP28:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP40]], align 1 +; ALL-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 64 +; ALL-NEXT: store <4 x i32> [[TMP28]], ptr addrspace(1) [[TMP29]], align 1 +; ALL-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 80 +; ALL-NEXT: [[TMP31:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP30]], align 1 +; ALL-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 80 +; ALL-NEXT: store <4 x i32> [[TMP31]], ptr addrspace(1) [[TMP32]], align 1 ; ALL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 96 ; ALL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) [[TMP21]], align 1 ; ALL-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 96 @@ -456,10 +469,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(ptr addrspace ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -479,10 +492,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(ptr addrspace ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -502,10 +515,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(ptr addrspace ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -525,10 +538,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(ptr addrspace ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -548,10 +561,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(ptr addrspace ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -575,10 +588,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(ptr addrspace ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -606,10 +619,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(ptr addrspace ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -633,10 +646,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(ptr addrspace ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -691,10 +704,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(ptr addrspace ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -764,10 +777,10 @@ define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(ptr addrspa ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -814,10 +827,10 @@ define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(ptr addrspa ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 4 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP1]], align 4 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1 -; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1 +; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -864,10 +877,10 @@ define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(ptr addrspa ; OPT: load-store-loop: ; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 1 +; OPT-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP1]], align 1 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4 +; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 256 ; OPT-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 1024 ; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: @@ -1194,17 +1207,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_16(ptr addrspace(1 ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memcpy_global_align4_global_align4_16( -; ALL-NEXT: br label [[LOAD_STORE_LOOP:%.*]] -; ALL: load-store-loop: -; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] +; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 0 ; ALL-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] +; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 0 ; ALL-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 -; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 16 -; ALL-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] -; ALL: memcpy-split: ; ALL-NEXT: ret void ; call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 16, i1 false) @@ -1326,20 +1332,20 @@ define amdgpu_kernel void @memmove_flat_align1_global_align1(ptr %dst, ptr addrs ; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]] ; ALL: memmove_bwd_loop: ; ALL-NEXT: [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ] -; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP2]], 16 +; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP2]], 256 ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]] -; ALL-NEXT: [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP3]], align 1 +; ALL-NEXT: [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP3]], align 1 ; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[BWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT]], ptr [[TMP4]], align 1 +; ALL-NEXT: store <64 x i32> [[ELEMENT]], ptr [[TMP4]], align 1 ; ALL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[BWD_INDEX]], 0 ; ALL-NEXT: br i1 [[TMP5]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; ALL: memmove_fwd_loop: ; ALL-NEXT: [[FWD_INDEX:%.*]] = phi i64 [ [[TMP8:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ] ; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]] -; ALL-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP6]], align 1 +; ALL-NEXT: [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP6]], align 1 ; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[FWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT1]], ptr [[TMP7]], align 1 -; ALL-NEXT: [[TMP8]] = add i64 [[FWD_INDEX]], 16 +; ALL-NEXT: store <64 x i32> [[ELEMENT1]], ptr [[TMP7]], align 1 +; ALL-NEXT: [[TMP8]] = add i64 [[FWD_INDEX]], 256 ; ALL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 256 ; ALL-NEXT: br i1 [[TMP9]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]] ; ALL: memmove_done: @@ -1360,20 +1366,20 @@ define amdgpu_kernel void @memmove_global_align1_flat_align1(ptr addrspace(1) %d ; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]] ; ALL: memmove_bwd_loop: ; ALL-NEXT: [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ] -; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP2]], 16 +; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP2]], 256 ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[BWD_INDEX]] -; ALL-NEXT: [[ELEMENT:%.*]] = load <4 x i32>, ptr [[TMP3]], align 1 +; ALL-NEXT: [[ELEMENT:%.*]] = load <64 x i32>, ptr [[TMP3]], align 1 ; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP4]], align 1 +; ALL-NEXT: store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP4]], align 1 ; ALL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[BWD_INDEX]], 0 ; ALL-NEXT: br i1 [[TMP5]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; ALL: memmove_fwd_loop: ; ALL-NEXT: [[FWD_INDEX:%.*]] = phi i64 [ [[TMP8:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ] ; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[FWD_INDEX]] -; ALL-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr [[TMP6]], align 1 +; ALL-NEXT: [[ELEMENT1:%.*]] = load <64 x i32>, ptr [[TMP6]], align 1 ; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP7]], align 1 -; ALL-NEXT: [[TMP8]] = add i64 [[FWD_INDEX]], 16 +; ALL-NEXT: store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP7]], align 1 +; ALL-NEXT: [[TMP8]] = add i64 [[FWD_INDEX]], 256 ; ALL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 256 ; ALL-NEXT: br i1 [[TMP9]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]] ; ALL: memmove_done: @@ -1394,20 +1400,20 @@ define amdgpu_kernel void @memmove_flat_align1_private_align1(ptr %dst, ptr addr ; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]] ; ALL: memmove_bwd_loop: ; ALL-NEXT: [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ] -; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP2]], 16 +; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP2]], 256 ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i64 [[BWD_INDEX]] -; ALL-NEXT: [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP3]], align 1 +; ALL-NEXT: [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP3]], align 1 ; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[BWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT]], ptr [[TMP4]], align 1 +; ALL-NEXT: store <64 x i32> [[ELEMENT]], ptr [[TMP4]], align 1 ; ALL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[BWD_INDEX]], 0 ; ALL-NEXT: br i1 [[TMP5]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; ALL: memmove_fwd_loop: ; ALL-NEXT: [[FWD_INDEX:%.*]] = phi i64 [ [[TMP8:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ] ; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i64 [[FWD_INDEX]] -; ALL-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP6]], align 1 +; ALL-NEXT: [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP6]], align 1 ; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[FWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT1]], ptr [[TMP7]], align 1 -; ALL-NEXT: [[TMP8]] = add i64 [[FWD_INDEX]], 16 +; ALL-NEXT: store <64 x i32> [[ELEMENT1]], ptr [[TMP7]], align 1 +; ALL-NEXT: [[TMP8]] = add i64 [[FWD_INDEX]], 256 ; ALL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 256 ; ALL-NEXT: br i1 [[TMP9]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]] ; ALL: memmove_done: @@ -1428,20 +1434,20 @@ define amdgpu_kernel void @memmove_private_align1_flat_align1(ptr addrspace(5) % ; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]] ; ALL: memmove_bwd_loop: ; ALL-NEXT: [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ] -; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP2]], 16 +; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP2]], 256 ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[BWD_INDEX]] -; ALL-NEXT: [[ELEMENT:%.*]] = load <4 x i32>, ptr [[TMP3]], align 1 +; ALL-NEXT: [[ELEMENT:%.*]] = load <64 x i32>, ptr [[TMP3]], align 1 ; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i64 [[BWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT]], ptr addrspace(5) [[TMP4]], align 1 +; ALL-NEXT: store <64 x i32> [[ELEMENT]], ptr addrspace(5) [[TMP4]], align 1 ; ALL-NEXT: [[TMP5:%.*]] = icmp eq i64 [[BWD_INDEX]], 0 ; ALL-NEXT: br i1 [[TMP5]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; ALL: memmove_fwd_loop: ; ALL-NEXT: [[FWD_INDEX:%.*]] = phi i64 [ [[TMP8:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ] ; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[FWD_INDEX]] -; ALL-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr [[TMP6]], align 1 +; ALL-NEXT: [[ELEMENT1:%.*]] = load <64 x i32>, ptr [[TMP6]], align 1 ; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i64 [[FWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(5) [[TMP7]], align 1 -; ALL-NEXT: [[TMP8]] = add i64 [[FWD_INDEX]], 16 +; ALL-NEXT: store <64 x i32> [[ELEMENT1]], ptr addrspace(5) [[TMP7]], align 1 +; ALL-NEXT: [[TMP8]] = add i64 [[FWD_INDEX]], 256 ; ALL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 256 ; ALL-NEXT: br i1 [[TMP9]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]] ; ALL: memmove_done: @@ -1461,10 +1467,10 @@ define amdgpu_kernel void @memmove_private_align1_global_align1(ptr addrspace(5) ; ALL: load-store-loop: ; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1, !alias.scope [[META0:![0-9]+]] +; ALL-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 1, !alias.scope [[META0:![0-9]+]] ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1, !noalias [[META0]] -; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; ALL-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1, !noalias [[META0]] +; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 256 ; ALL-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; ALL: memcpy-split: @@ -1484,10 +1490,10 @@ define amdgpu_kernel void @memmove_global_align1_private_align1(ptr addrspace(1) ; ALL: load-store-loop: ; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 1, !alias.scope [[META3:![0-9]+]] +; ALL-NEXT: [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP1]], align 1, !alias.scope [[META3:![0-9]+]] ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1, !noalias [[META3]] -; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 +; ALL-NEXT: store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1, !noalias [[META3]] +; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 256 ; ALL-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; ALL: memcpy-split: @@ -2144,20 +2150,20 @@ define amdgpu_kernel void @memmove_private_align1_private_align1(ptr addrspace(5 ; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]] ; ALL: memmove_bwd_loop: ; ALL-NEXT: [[TMP1:%.*]] = phi i32 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ] -; ALL-NEXT: [[BWD_INDEX]] = sub i32 [[TMP1]], 16 +; ALL-NEXT: [[BWD_INDEX]] = sub i32 [[TMP1]], 256 ; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 [[BWD_INDEX]] -; ALL-NEXT: [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP2]], align 1 +; ALL-NEXT: [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP2]], align 1 ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 [[BWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT]], ptr addrspace(5) [[TMP3]], align 1 +; ALL-NEXT: store <64 x i32> [[ELEMENT]], ptr addrspace(5) [[TMP3]], align 1 ; ALL-NEXT: [[TMP4:%.*]] = icmp eq i32 [[BWD_INDEX]], 0 ; ALL-NEXT: br i1 [[TMP4]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; ALL: memmove_fwd_loop: ; ALL-NEXT: [[FWD_INDEX:%.*]] = phi i32 [ [[TMP7:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ] ; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 [[FWD_INDEX]] -; ALL-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP5]], align 1 +; ALL-NEXT: [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP5]], align 1 ; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 [[FWD_INDEX]] -; ALL-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(5) [[TMP6]], align 1 -; ALL-NEXT: [[TMP7]] = add i32 [[FWD_INDEX]], 16 +; ALL-NEXT: store <64 x i32> [[ELEMENT1]], ptr addrspace(5) [[TMP6]], align 1 +; ALL-NEXT: [[TMP7]] = add i32 [[FWD_INDEX]], 256 ; ALL-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 256 ; ALL-NEXT: br i1 [[TMP8]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]] ; ALL: memmove_done: @@ -2231,27 +2237,27 @@ define amdgpu_kernel void @memmove_global_align4_static_residual_empty(ptr addrs ; OPT-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]] ; OPT-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]] ; OPT: memmove_bwd_loop: -; OPT-NEXT: [[TMP1:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1040, [[TMP0:%.*]] ] -; OPT-NEXT: [[BWD_INDEX]] = sub i64 [[TMP1]], 16 +; OPT-NEXT: [[TMP11:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1280, [[TMP0:%.*]] ] +; OPT-NEXT: [[BWD_INDEX]] = sub i64 [[TMP11]], 256 ; OPT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]] -; OPT-NEXT: [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP2]], align 1 +; OPT-NEXT: [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP2]], align 1 ; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]] -; OPT-NEXT: store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1 +; OPT-NEXT: store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1 ; OPT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[BWD_INDEX]], 0 ; OPT-NEXT: br i1 [[TMP4]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; OPT: memmove_fwd_loop: ; OPT-NEXT: [[FWD_INDEX:%.*]] = phi i64 [ [[TMP7:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ] ; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]] -; OPT-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 +; OPT-NEXT: [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP5]], align 1 ; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]] -; OPT-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1 -; OPT-NEXT: [[TMP7]] = add i64 [[FWD_INDEX]], 16 -; OPT-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 1040 +; OPT-NEXT: store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1 +; OPT-NEXT: [[TMP7]] = add i64 [[FWD_INDEX]], 256 +; OPT-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 1280 ; OPT-NEXT: br i1 [[TMP8]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]] ; OPT: memmove_done: ; OPT-NEXT: ret void ; - call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1040, i1 false) + call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1280, i1 false) ret void } @@ -2279,20 +2285,20 @@ define amdgpu_kernel void @memmove_global_align4_static_residual_full(ptr addrsp ; OPT-NEXT: br label [[MEMMOVE_BWD_LOOP:%.*]] ; OPT: memmove_bwd_loop: ; OPT-NEXT: [[TMP13:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1024, [[MEMMOVE_BWD_RESIDUAL]] ] -; OPT-NEXT: [[BWD_INDEX]] = sub i64 [[TMP13]], 16 +; OPT-NEXT: [[BWD_INDEX]] = sub i64 [[TMP13]], 256 ; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]] -; OPT-NEXT: [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP14]], align 1 +; OPT-NEXT: [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP14]], align 1 ; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]] -; OPT-NEXT: store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP15]], align 1 +; OPT-NEXT: store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP15]], align 1 ; OPT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[BWD_INDEX]], 0 ; OPT-NEXT: br i1 [[TMP16]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; OPT: memmove_fwd_loop: ; OPT-NEXT: [[FWD_INDEX:%.*]] = phi i64 [ [[TMP19:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0:%.*]] ] ; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]] -; OPT-NEXT: [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP17]], align 1 +; OPT-NEXT: [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP17]], align 1 ; OPT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]] -; OPT-NEXT: store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP18]], align 1 -; OPT-NEXT: [[TMP19]] = add i64 [[FWD_INDEX]], 16 +; OPT-NEXT: store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP18]], align 1 +; OPT-NEXT: [[TMP19]] = add i64 [[FWD_INDEX]], 256 ; OPT-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], 1024 ; OPT-NEXT: br i1 [[TMP20]], label [[MEMMOVE_FWD_RESIDUAL:%.*]], label [[MEMMOVE_FWD_LOOP]] ; OPT: memmove_fwd_residual: @@ -2363,40 +2369,40 @@ entry: define amdgpu_kernel void @memmove_volatile(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { ; MAX1024-LABEL: @memmove_volatile( -; MAX1024-NEXT: call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 64, i1 true) +; MAX1024-NEXT: call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 512, i1 true) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memmove_volatile( ; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]] ; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]] ; ALL: memmove_bwd_loop: -; ALL-NEXT: [[TMP1:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 64, [[TMP0:%.*]] ] -; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP1]], 16 +; ALL-NEXT: [[TMP1:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 512, [[TMP0:%.*]] ] +; ALL-NEXT: [[BWD_INDEX]] = sub i64 [[TMP1]], 256 ; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]] -; ALL-NEXT: [[ELEMENT:%.*]] = load volatile <4 x i32>, ptr addrspace(1) [[TMP2]], align 1 +; ALL-NEXT: [[ELEMENT:%.*]] = load volatile <64 x i32>, ptr addrspace(1) [[TMP2]], align 1 ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]] -; ALL-NEXT: store volatile <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1 +; ALL-NEXT: store volatile <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1 ; ALL-NEXT: [[TMP4:%.*]] = icmp eq i64 [[BWD_INDEX]], 0 ; ALL-NEXT: br i1 [[TMP4]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]] ; ALL: memmove_fwd_loop: ; ALL-NEXT: [[FWD_INDEX:%.*]] = phi i64 [ [[TMP7:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ] ; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]] -; ALL-NEXT: [[ELEMENT1:%.*]] = load volatile <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 +; ALL-NEXT: [[ELEMENT1:%.*]] = load volatile <64 x i32>, ptr addrspace(1) [[TMP5]], align 1 ; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]] -; ALL-NEXT: store volatile <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1 -; ALL-NEXT: [[TMP7]] = add i64 [[FWD_INDEX]], 16 -; ALL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 64 +; ALL-NEXT: store volatile <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1 +; ALL-NEXT: [[TMP7]] = add i64 [[FWD_INDEX]], 256 +; ALL-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 512 ; ALL-NEXT: br i1 [[TMP8]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]] ; ALL: memmove_done: ; ALL-NEXT: ret void ; - call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 64, i1 true) + call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 512, i1 true) ret void } define amdgpu_kernel void @memcpy_volatile(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { ; MAX1024-LABEL: @memcpy_volatile( -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 64, i1 true) +; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 512, i1 true) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memcpy_volatile( @@ -2404,16 +2410,16 @@ define amdgpu_kernel void @memcpy_volatile(ptr addrspace(1) %dst, ptr addrspace( ; ALL: load-store-loop: ; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] ; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: [[TMP2:%.*]] = load volatile <4 x i32>, ptr addrspace(1) [[TMP1]], align 1 +; ALL-NEXT: [[TMP2:%.*]] = load volatile <64 x i32>, ptr addrspace(1) [[TMP1]], align 1 ; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: store volatile <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1 -; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 16 -; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 +; ALL-NEXT: store volatile <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1 +; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 +; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 512 ; ALL-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; ALL: memcpy-split: ; ALL-NEXT: ret void ; - call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 64, i1 true) + call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 512, i1 true) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll new file mode 100644 index 000000000000000..565fce0e7abdeae --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll @@ -0,0 +1,16049 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 %s -o - | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -mattr=-unaligned-access-mode %s -o - | FileCheck -check-prefix=ALIGNED %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -amdgpu-memcpy-loop-unroll=3 %s -o - | FileCheck -check-prefix=UNROLL3 %s + +; For checking that LowerMemIntrinsics lowers memcpy and memmove with large +; constant copy-sizes into loops with multiple load/store pairs. + + +; memcpy for address spaces 0, 1, 4, 5 + +define void @memcpy_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { +; CHECK-LABEL: memcpy_p0_p0_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: .LBB0_1: ; %load-store-loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_add_co_u32 v96, vcc_lo, v2, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: s_clause 0xf +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[96:97] offset:224 +; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[96:97] offset:240 +; CHECK-NEXT: flat_load_dwordx4 v[12:15], v[96:97] offset:192 +; CHECK-NEXT: flat_load_dwordx4 v[16:19], v[96:97] offset:208 +; CHECK-NEXT: flat_load_dwordx4 v[20:23], v[96:97] offset:160 +; CHECK-NEXT: flat_load_dwordx4 v[24:27], v[96:97] offset:176 +; CHECK-NEXT: flat_load_dwordx4 v[28:31], v[96:97] offset:128 +; CHECK-NEXT: flat_load_dwordx4 v[32:35], v[96:97] offset:144 +; CHECK-NEXT: flat_load_dwordx4 v[36:39], v[96:97] offset:96 +; CHECK-NEXT: flat_load_dwordx4 v[48:51], v[96:97] offset:112 +; CHECK-NEXT: flat_load_dwordx4 v[52:55], v[96:97] offset:64 +; CHECK-NEXT: flat_load_dwordx4 v[64:67], v[96:97] offset:80 +; CHECK-NEXT: flat_load_dwordx4 v[68:71], v[96:97] offset:32 +; CHECK-NEXT: flat_load_dwordx4 v[80:83], v[96:97] offset:48 +; CHECK-NEXT: flat_load_dwordx4 v[84:87], v[96:97] +; CHECK-NEXT: flat_load_dwordx4 v[96:99], v[96:97] offset:16 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: s_waitcnt vmcnt(15) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[4:7] offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(14) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[8:11] offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(13) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[12:15] offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(12) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[16:19] offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(11) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[20:23] offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(10) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[24:27] offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(9) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[28:31] offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(8) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[32:35] offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(7) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[36:39] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(6) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[48:51] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(5) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[52:55] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(4) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[64:67] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[68:71] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[80:83] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[84:87] +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[96:99] offset:16 +; CHECK-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; CHECK-NEXT: s_cbranch_vccnz .LBB0_1 +; CHECK-NEXT: ; %bb.2: ; %memcpy-split +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memcpy_p0_p0_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: .LBB0_1: ; %load-store-loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: v_add_co_u32 v24, vcc_lo, v2, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v25, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: s_clause 0xf +; ALIGNED-NEXT: flat_load_dwordx4 v[16:19], v[24:25] offset:240 +; ALIGNED-NEXT: flat_load_dwordx4 v[20:23], v[24:25] offset:224 +; ALIGNED-NEXT: flat_load_dwordx4 v[4:7], v[24:25] +; ALIGNED-NEXT: flat_load_dwordx4 v[8:11], v[24:25] offset:16 +; ALIGNED-NEXT: flat_load_dwordx4 v[12:15], v[24:25] offset:32 +; ALIGNED-NEXT: flat_load_dwordx4 v[112:115], v[24:25] offset:48 +; ALIGNED-NEXT: flat_load_dwordx4 v[116:119], v[24:25] offset:64 +; ALIGNED-NEXT: flat_load_dwordx4 v[40:43], v[24:25] offset:80 +; ALIGNED-NEXT: flat_load_dwordx4 v[26:29], v[24:25] offset:96 +; ALIGNED-NEXT: flat_load_dwordx4 v[32:35], v[24:25] offset:112 +; ALIGNED-NEXT: flat_load_dwordx4 v[44:47], v[24:25] offset:128 +; ALIGNED-NEXT: flat_load_dwordx4 v[52:55], v[24:25] offset:144 +; ALIGNED-NEXT: flat_load_dwordx4 v[66:69], v[24:25] offset:160 +; ALIGNED-NEXT: flat_load_dwordx4 v[81:84], v[24:25] offset:176 +; ALIGNED-NEXT: flat_load_dwordx4 v[96:99], v[24:25] offset:192 +; ALIGNED-NEXT: flat_load_dwordx4 v[100:103], v[24:25] offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(15) lgkmcnt(15) +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v31 offset:254 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:252 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v30 offset:250 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:248 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v25 offset:246 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:244 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v24 offset:242 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:240 +; ALIGNED-NEXT: s_waitcnt lgkmcnt(22) +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v51 offset:238 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:236 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v50 offset:234 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:232 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v49 offset:230 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:228 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v36 offset:226 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:224 +; ALIGNED-NEXT: s_waitcnt lgkmcnt(16) +; ALIGNED-NEXT: buffer_store_dword v100, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_store_dword v102, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_store_dword v103, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 8, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v30 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 8, v30 +; ALIGNED-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v71 offset:222 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:220 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v70 offset:218 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:216 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v65 offset:214 +; ALIGNED-NEXT: flat_store_byte v[16:17], v65 offset:212 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v64 offset:210 +; ALIGNED-NEXT: flat_store_byte v[16:17], v64 offset:208 +; ALIGNED-NEXT: buffer_store_dword v96, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: buffer_store_dword v97, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_store_dword v98, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_store_dword v99, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v87 offset:206 +; ALIGNED-NEXT: flat_store_byte v[16:17], v87 offset:204 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v86 offset:202 +; ALIGNED-NEXT: flat_store_byte v[16:17], v86 offset:200 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v85 offset:198 +; ALIGNED-NEXT: flat_store_byte v[16:17], v85 offset:196 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v80 offset:194 +; ALIGNED-NEXT: flat_store_byte v[16:17], v80 offset:192 +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: buffer_store_dword v82, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_store_dword v83, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v101, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_load_dword v99, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_load_dword v96, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v101 offset:190 +; ALIGNED-NEXT: flat_store_byte v[16:17], v101 offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v99 offset:186 +; ALIGNED-NEXT: flat_store_byte v[16:17], v99 offset:184 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v96 offset:182 +; ALIGNED-NEXT: flat_store_byte v[16:17], v96 offset:180 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v81 offset:178 +; ALIGNED-NEXT: flat_store_byte v[16:17], v81 offset:176 +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v100, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_load_dword v97, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v100 offset:174 +; ALIGNED-NEXT: flat_store_byte v[16:17], v100 offset:172 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v97 offset:170 +; ALIGNED-NEXT: flat_store_byte v[16:17], v97 offset:168 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v82 offset:166 +; ALIGNED-NEXT: flat_store_byte v[16:17], v82 offset:164 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v66 offset:162 +; ALIGNED-NEXT: flat_store_byte v[16:17], v66 offset:160 +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v98, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v98 offset:158 +; ALIGNED-NEXT: flat_store_byte v[16:17], v98 offset:156 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v83 offset:154 +; ALIGNED-NEXT: flat_store_byte v[16:17], v83 offset:152 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v67 offset:150 +; ALIGNED-NEXT: flat_store_byte v[16:17], v67 offset:148 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v52 offset:146 +; ALIGNED-NEXT: flat_store_byte v[16:17], v52 offset:144 +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v84 offset:142 +; ALIGNED-NEXT: flat_store_byte v[16:17], v84 offset:140 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v68 offset:138 +; ALIGNED-NEXT: flat_store_byte v[16:17], v68 offset:136 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v53 offset:134 +; ALIGNED-NEXT: flat_store_byte v[16:17], v53 offset:132 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v37 offset:130 +; ALIGNED-NEXT: flat_store_byte v[16:17], v37 offset:128 +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v69 offset:126 +; ALIGNED-NEXT: flat_store_byte v[16:17], v69 offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v54 offset:122 +; ALIGNED-NEXT: flat_store_byte v[16:17], v54 offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v38 offset:118 +; ALIGNED-NEXT: flat_store_byte v[16:17], v38 offset:116 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v32 offset:114 +; ALIGNED-NEXT: flat_store_byte v[16:17], v32 offset:112 +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v55 offset:110 +; ALIGNED-NEXT: flat_store_byte v[16:17], v55 offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v39 offset:106 +; ALIGNED-NEXT: flat_store_byte v[16:17], v39 offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v33 offset:102 +; ALIGNED-NEXT: flat_store_byte v[16:17], v33 offset:100 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v26 offset:98 +; ALIGNED-NEXT: flat_store_byte v[16:17], v26 offset:96 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v48 offset:94 +; ALIGNED-NEXT: flat_store_byte v[16:17], v48 offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v34 offset:90 +; ALIGNED-NEXT: flat_store_byte v[16:17], v34 offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v27 offset:86 +; ALIGNED-NEXT: flat_store_byte v[16:17], v27 offset:84 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v21 offset:82 +; ALIGNED-NEXT: flat_store_byte v[16:17], v21 offset:80 +; ALIGNED-NEXT: buffer_store_dword v116, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: buffer_store_dword v117, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_store_dword v118, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_store_dword v119, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v35 offset:78 +; ALIGNED-NEXT: flat_store_byte v[16:17], v35 offset:76 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v28 offset:74 +; ALIGNED-NEXT: flat_store_byte v[16:17], v28 offset:72 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v22 offset:70 +; ALIGNED-NEXT: flat_store_byte v[16:17], v22 offset:68 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v19 offset:66 +; ALIGNED-NEXT: flat_store_byte v[16:17], v19 offset:64 +; ALIGNED-NEXT: buffer_store_dword v112, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: buffer_store_dword v113, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_store_dword v114, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_store_dword v115, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 8, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 8, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v50 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 8, v50 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v29 offset:62 +; ALIGNED-NEXT: flat_store_byte v[16:17], v29 offset:60 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v23 offset:58 +; ALIGNED-NEXT: flat_store_byte v[16:17], v23 offset:56 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v20 offset:54 +; ALIGNED-NEXT: flat_store_byte v[16:17], v20 offset:52 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v18 offset:50 +; ALIGNED-NEXT: flat_store_byte v[16:17], v18 offset:48 +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v15 offset:42 +; ALIGNED-NEXT: flat_store_byte v[16:17], v15 offset:40 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v14 offset:46 +; ALIGNED-NEXT: flat_store_byte v[16:17], v14 offset:44 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v13 offset:34 +; ALIGNED-NEXT: flat_store_byte v[16:17], v13 offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v12 offset:38 +; ALIGNED-NEXT: flat_store_byte v[16:17], v12 offset:36 +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v11 offset:30 +; ALIGNED-NEXT: flat_store_byte v[16:17], v11 offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v10 offset:26 +; ALIGNED-NEXT: flat_store_byte v[16:17], v10 offset:24 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v9 offset:22 +; ALIGNED-NEXT: flat_store_byte v[16:17], v9 offset:20 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v8 offset:18 +; ALIGNED-NEXT: flat_store_byte v[16:17], v8 offset:16 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:247 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v65 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:255 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v49 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v49 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:253 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v36 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 8, v36 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:251 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v71 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v71 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:249 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v65 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:245 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v64 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 8, v64 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:215 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v67 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v67 +; ALIGNED-NEXT: flat_store_byte v[16:17], v113 offset:243 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v87 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v87 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:241 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v86 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 8, v86 +; ALIGNED-NEXT: flat_store_byte v[16:17], v114 offset:239 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v85 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v85 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:237 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 8, v80 +; ALIGNED-NEXT: flat_store_byte v[16:17], v115 offset:235 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v101 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:233 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v99 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:231 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v96 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 8, v96 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:229 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v81 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v81 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:227 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v100 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:225 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v97 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 8, v97 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:223 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v82 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 8, v82 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:221 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v66 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 8, v66 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:219 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v98 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v98 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:217 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v83 +; ALIGNED-NEXT: flat_store_byte v[16:17], v65 offset:213 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 24, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 8, v52 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:211 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v84 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 8, v84 +; ALIGNED-NEXT: flat_store_byte v[16:17], v64 offset:209 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v68 +; ALIGNED-NEXT: flat_store_byte v[16:17], v67 offset:149 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 24, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 8, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v68 +; ALIGNED-NEXT: flat_store_byte v[16:17], v113 offset:207 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v53 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 8, v53 +; ALIGNED-NEXT: flat_store_byte v[16:17], v87 offset:205 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 24, v37 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 8, v37 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:203 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v69 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v69 +; ALIGNED-NEXT: flat_store_byte v[16:17], v86 offset:201 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v54 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v54 +; ALIGNED-NEXT: flat_store_byte v[16:17], v114 offset:199 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v38 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 8, v38 +; ALIGNED-NEXT: flat_store_byte v[16:17], v85 offset:197 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 24, v32 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 8, v32 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:195 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v55 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 8, v55 +; ALIGNED-NEXT: flat_store_byte v[16:17], v80 offset:193 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v39 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 8, v39 +; ALIGNED-NEXT: flat_store_byte v[16:17], v115 offset:191 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v33 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 8, v33 +; ALIGNED-NEXT: flat_store_byte v[16:17], v101 offset:189 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v26 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v26, 8, v26 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:187 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v48 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 8, v48 +; ALIGNED-NEXT: flat_store_byte v[16:17], v99 offset:185 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v34 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 8, v34 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:183 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v27, 8, v27 +; ALIGNED-NEXT: flat_store_byte v[16:17], v96 offset:181 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 24, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 8, v21 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:179 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 8, v35 +; ALIGNED-NEXT: flat_store_byte v[16:17], v81 offset:177 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 24, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 8, v28 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:175 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 8, v22 +; ALIGNED-NEXT: flat_store_byte v[16:17], v100 offset:173 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 24, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 8, v19 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:171 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v29 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 8, v29 +; ALIGNED-NEXT: flat_store_byte v[16:17], v97 offset:169 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 24, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 8, v23 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:167 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 8, v20 +; ALIGNED-NEXT: flat_store_byte v[16:17], v82 offset:165 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 8, v18 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:163 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v15, 8, v15 +; ALIGNED-NEXT: flat_store_byte v[16:17], v66 offset:161 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v14 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v14, 8, v14 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:159 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v13 +; ALIGNED-NEXT: flat_store_byte v[16:17], v98 offset:157 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v12, 8, v12 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:155 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v11 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 8, v11 +; ALIGNED-NEXT: flat_store_byte v[16:17], v83 offset:153 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 24, v10 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 8, v10 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:151 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v9 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v9 +; ALIGNED-NEXT: flat_store_byte v[16:17], v65 offset:147 +; ALIGNED-NEXT: flat_store_byte v[16:17], v52 offset:145 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:143 +; ALIGNED-NEXT: flat_store_byte v[16:17], v84 offset:141 +; ALIGNED-NEXT: flat_store_byte v[16:17], v64 offset:139 +; ALIGNED-NEXT: flat_store_byte v[16:17], v68 offset:137 +; ALIGNED-NEXT: flat_store_byte v[16:17], v113 offset:135 +; ALIGNED-NEXT: flat_store_byte v[16:17], v53 offset:133 +; ALIGNED-NEXT: flat_store_byte v[16:17], v87 offset:131 +; ALIGNED-NEXT: flat_store_byte v[16:17], v37 offset:129 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:127 +; ALIGNED-NEXT: flat_store_byte v[16:17], v69 offset:125 +; ALIGNED-NEXT: flat_store_byte v[16:17], v86 offset:123 +; ALIGNED-NEXT: flat_store_byte v[16:17], v54 offset:121 +; ALIGNED-NEXT: flat_store_byte v[16:17], v114 offset:119 +; ALIGNED-NEXT: flat_store_byte v[16:17], v38 offset:117 +; ALIGNED-NEXT: flat_store_byte v[16:17], v85 offset:115 +; ALIGNED-NEXT: flat_store_byte v[16:17], v32 offset:113 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:111 +; ALIGNED-NEXT: flat_store_byte v[16:17], v55 offset:109 +; ALIGNED-NEXT: flat_store_byte v[16:17], v80 offset:107 +; ALIGNED-NEXT: flat_store_byte v[16:17], v39 offset:105 +; ALIGNED-NEXT: flat_store_byte v[16:17], v115 offset:103 +; ALIGNED-NEXT: flat_store_byte v[16:17], v33 offset:101 +; ALIGNED-NEXT: flat_store_byte v[16:17], v101 offset:99 +; ALIGNED-NEXT: flat_store_byte v[16:17], v26 offset:97 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:95 +; ALIGNED-NEXT: flat_store_byte v[16:17], v48 offset:93 +; ALIGNED-NEXT: flat_store_byte v[16:17], v99 offset:91 +; ALIGNED-NEXT: flat_store_byte v[16:17], v34 offset:89 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:87 +; ALIGNED-NEXT: flat_store_byte v[16:17], v27 offset:85 +; ALIGNED-NEXT: flat_store_byte v[16:17], v96 offset:83 +; ALIGNED-NEXT: flat_store_byte v[16:17], v21 offset:81 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:79 +; ALIGNED-NEXT: flat_store_byte v[16:17], v35 offset:77 +; ALIGNED-NEXT: flat_store_byte v[16:17], v81 offset:75 +; ALIGNED-NEXT: flat_store_byte v[16:17], v28 offset:73 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:71 +; ALIGNED-NEXT: flat_store_byte v[16:17], v22 offset:69 +; ALIGNED-NEXT: flat_store_byte v[16:17], v100 offset:67 +; ALIGNED-NEXT: flat_store_byte v[16:17], v19 offset:65 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:63 +; ALIGNED-NEXT: flat_store_byte v[16:17], v29 offset:61 +; ALIGNED-NEXT: flat_store_byte v[16:17], v97 offset:59 +; ALIGNED-NEXT: flat_store_byte v[16:17], v23 offset:57 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:55 +; ALIGNED-NEXT: flat_store_byte v[16:17], v20 offset:53 +; ALIGNED-NEXT: flat_store_byte v[16:17], v82 offset:51 +; ALIGNED-NEXT: flat_store_byte v[16:17], v18 offset:49 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:43 +; ALIGNED-NEXT: flat_store_byte v[16:17], v15 offset:41 +; ALIGNED-NEXT: flat_store_byte v[16:17], v66 offset:47 +; ALIGNED-NEXT: flat_store_byte v[16:17], v14 offset:45 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:35 +; ALIGNED-NEXT: flat_store_byte v[16:17], v13 offset:33 +; ALIGNED-NEXT: flat_store_byte v[16:17], v98 offset:39 +; ALIGNED-NEXT: flat_store_byte v[16:17], v12 offset:37 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:31 +; ALIGNED-NEXT: flat_store_byte v[16:17], v11 offset:29 +; ALIGNED-NEXT: flat_store_byte v[16:17], v83 offset:27 +; ALIGNED-NEXT: flat_store_byte v[16:17], v10 offset:25 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:23 +; ALIGNED-NEXT: flat_store_byte v[16:17], v9 offset:21 +; ALIGNED-NEXT: flat_store_byte v[16:17], v67 offset:19 +; ALIGNED-NEXT: flat_store_byte v[16:17], v8 offset:17 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v7 offset:14 +; ALIGNED-NEXT: flat_store_byte v[16:17], v7 offset:12 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v6 offset:10 +; ALIGNED-NEXT: flat_store_byte v[16:17], v6 offset:8 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v5 offset:6 +; ALIGNED-NEXT: flat_store_byte v[16:17], v5 offset:4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v4 offset:2 +; ALIGNED-NEXT: flat_store_byte v[16:17], v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 24, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v7, 8, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 24, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v6, 8, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 24, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v4, 8, v4 +; ALIGNED-NEXT: flat_store_byte v[16:17], v8 offset:15 +; ALIGNED-NEXT: flat_store_byte v[16:17], v7 offset:13 +; ALIGNED-NEXT: flat_store_byte v[16:17], v9 offset:11 +; ALIGNED-NEXT: flat_store_byte v[16:17], v6 offset:9 +; ALIGNED-NEXT: flat_store_byte v[16:17], v10 offset:7 +; ALIGNED-NEXT: flat_store_byte v[16:17], v5 offset:5 +; ALIGNED-NEXT: flat_store_byte v[16:17], v11 offset:3 +; ALIGNED-NEXT: flat_store_byte v[16:17], v4 offset:1 +; ALIGNED-NEXT: s_cbranch_vccnz .LBB0_1 +; ALIGNED-NEXT: ; %bb.2: ; %memcpy-split +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 +; ALIGNED-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memcpy_p0_p0_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB0_1: ; %load-store-loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: v_add_co_u32 v12, vcc_lo, v2, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: s_clause 0x2 +; UNROLL3-NEXT: flat_load_dwordx4 v[4:7], v[12:13] +; UNROLL3-NEXT: flat_load_dwordx4 v[8:11], v[12:13] offset:16 +; UNROLL3-NEXT: flat_load_dwordx4 v[12:15], v[12:13] offset:32 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[4:7] +; UNROLL3-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[8:11] offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[12:15] offset:32 +; UNROLL3-NEXT: v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5] +; UNROLL3-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; UNROLL3-NEXT: s_cbranch_vccnz .LBB0_1 +; UNROLL3-NEXT: ; %bb.2: ; %memcpy-split +; UNROLL3-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:2016 +; UNROLL3-NEXT: flat_load_dwordx4 v[2:5], v[2:3] offset:2032 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:2032 +; UNROLL3-NEXT: s_waitcnt lgkmcnt(0) +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + +define void @memcpy_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { +; CHECK-LABEL: memcpy_p1_p1_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: .LBB1_1: ; %load-store-loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_add_co_u32 v96, vcc_lo, v2, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: s_clause 0xf +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[96:97], off offset:224 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[96:97], off offset:240 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v[96:97], off offset:192 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v[96:97], off offset:208 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v[96:97], off offset:160 +; CHECK-NEXT: global_load_dwordx4 v[24:27], v[96:97], off offset:176 +; CHECK-NEXT: global_load_dwordx4 v[28:31], v[96:97], off offset:128 +; CHECK-NEXT: global_load_dwordx4 v[32:35], v[96:97], off offset:144 +; CHECK-NEXT: global_load_dwordx4 v[36:39], v[96:97], off offset:96 +; CHECK-NEXT: global_load_dwordx4 v[48:51], v[96:97], off offset:112 +; CHECK-NEXT: global_load_dwordx4 v[52:55], v[96:97], off offset:64 +; CHECK-NEXT: global_load_dwordx4 v[64:67], v[96:97], off offset:80 +; CHECK-NEXT: global_load_dwordx4 v[68:71], v[96:97], off offset:32 +; CHECK-NEXT: global_load_dwordx4 v[80:83], v[96:97], off offset:48 +; CHECK-NEXT: global_load_dwordx4 v[84:87], v[96:97], off +; CHECK-NEXT: global_load_dwordx4 v[96:99], v[96:97], off offset:16 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: s_waitcnt vmcnt(15) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[4:7], off offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(14) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[8:11], off offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[12:15], off offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[16:19], off offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(11) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[20:23], off offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(10) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[24:27], off offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[28:31], off offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[32:35], off offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(7) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[36:39], off offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[48:51], off offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[52:55], off offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[64:67], off offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[68:71], off offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[80:83], off offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[84:87], off +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16 +; CHECK-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; CHECK-NEXT: s_cbranch_vccnz .LBB1_1 +; CHECK-NEXT: ; %bb.2: ; %memcpy-split +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memcpy_p1_p1_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: .LBB1_1: ; %load-store-loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: v_add_co_u32 v24, vcc_lo, v2, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v25, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: s_clause 0xf +; ALIGNED-NEXT: global_load_dwordx4 v[16:19], v[24:25], off offset:240 +; ALIGNED-NEXT: global_load_dwordx4 v[20:23], v[24:25], off offset:224 +; ALIGNED-NEXT: global_load_dwordx4 v[4:7], v[24:25], off +; ALIGNED-NEXT: global_load_dwordx4 v[8:11], v[24:25], off offset:16 +; ALIGNED-NEXT: global_load_dwordx4 v[12:15], v[24:25], off offset:32 +; ALIGNED-NEXT: global_load_dwordx4 v[112:115], v[24:25], off offset:48 +; ALIGNED-NEXT: global_load_dwordx4 v[116:119], v[24:25], off offset:64 +; ALIGNED-NEXT: global_load_dwordx4 v[40:43], v[24:25], off offset:80 +; ALIGNED-NEXT: global_load_dwordx4 v[26:29], v[24:25], off offset:96 +; ALIGNED-NEXT: global_load_dwordx4 v[32:35], v[24:25], off offset:112 +; ALIGNED-NEXT: global_load_dwordx4 v[44:47], v[24:25], off offset:128 +; ALIGNED-NEXT: global_load_dwordx4 v[52:55], v[24:25], off offset:144 +; ALIGNED-NEXT: global_load_dwordx4 v[66:69], v[24:25], off offset:160 +; ALIGNED-NEXT: global_load_dwordx4 v[81:84], v[24:25], off offset:176 +; ALIGNED-NEXT: global_load_dwordx4 v[96:99], v[24:25], off offset:192 +; ALIGNED-NEXT: global_load_dwordx4 v[100:103], v[24:25], off offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(15) +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v31, off offset:254 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:252 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v30, off offset:250 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:248 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v25, off offset:246 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:244 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v24, off offset:242 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:240 +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v51, off offset:238 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:236 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v50, off offset:234 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:232 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v49, off offset:230 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:228 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v36, off offset:226 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:224 +; ALIGNED-NEXT: buffer_store_dword v100, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_store_dword v102, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_store_dword v103, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 8, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v30 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 8, v30 +; ALIGNED-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v71, off offset:222 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:220 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v70, off offset:218 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:216 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v65, off offset:214 +; ALIGNED-NEXT: global_store_byte v[16:17], v65, off offset:212 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v64, off offset:210 +; ALIGNED-NEXT: global_store_byte v[16:17], v64, off offset:208 +; ALIGNED-NEXT: buffer_store_dword v96, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: buffer_store_dword v97, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_store_dword v98, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_store_dword v99, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v87, off offset:206 +; ALIGNED-NEXT: global_store_byte v[16:17], v87, off offset:204 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v86, off offset:202 +; ALIGNED-NEXT: global_store_byte v[16:17], v86, off offset:200 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v85, off offset:198 +; ALIGNED-NEXT: global_store_byte v[16:17], v85, off offset:196 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v80, off offset:194 +; ALIGNED-NEXT: global_store_byte v[16:17], v80, off offset:192 +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: buffer_store_dword v82, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_store_dword v83, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v101, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_load_dword v99, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_load_dword v96, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v101, off offset:190 +; ALIGNED-NEXT: global_store_byte v[16:17], v101, off offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v99, off offset:186 +; ALIGNED-NEXT: global_store_byte v[16:17], v99, off offset:184 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v96, off offset:182 +; ALIGNED-NEXT: global_store_byte v[16:17], v96, off offset:180 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v81, off offset:178 +; ALIGNED-NEXT: global_store_byte v[16:17], v81, off offset:176 +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v100, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_load_dword v97, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v100, off offset:174 +; ALIGNED-NEXT: global_store_byte v[16:17], v100, off offset:172 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v97, off offset:170 +; ALIGNED-NEXT: global_store_byte v[16:17], v97, off offset:168 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v82, off offset:166 +; ALIGNED-NEXT: global_store_byte v[16:17], v82, off offset:164 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v66, off offset:162 +; ALIGNED-NEXT: global_store_byte v[16:17], v66, off offset:160 +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v98, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v98, off offset:158 +; ALIGNED-NEXT: global_store_byte v[16:17], v98, off offset:156 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v83, off offset:154 +; ALIGNED-NEXT: global_store_byte v[16:17], v83, off offset:152 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v67, off offset:150 +; ALIGNED-NEXT: global_store_byte v[16:17], v67, off offset:148 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v52, off offset:146 +; ALIGNED-NEXT: global_store_byte v[16:17], v52, off offset:144 +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v84, off offset:142 +; ALIGNED-NEXT: global_store_byte v[16:17], v84, off offset:140 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v68, off offset:138 +; ALIGNED-NEXT: global_store_byte v[16:17], v68, off offset:136 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v53, off offset:134 +; ALIGNED-NEXT: global_store_byte v[16:17], v53, off offset:132 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v37, off offset:130 +; ALIGNED-NEXT: global_store_byte v[16:17], v37, off offset:128 +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v69, off offset:126 +; ALIGNED-NEXT: global_store_byte v[16:17], v69, off offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v54, off offset:122 +; ALIGNED-NEXT: global_store_byte v[16:17], v54, off offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v38, off offset:118 +; ALIGNED-NEXT: global_store_byte v[16:17], v38, off offset:116 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v32, off offset:114 +; ALIGNED-NEXT: global_store_byte v[16:17], v32, off offset:112 +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v55, off offset:110 +; ALIGNED-NEXT: global_store_byte v[16:17], v55, off offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v39, off offset:106 +; ALIGNED-NEXT: global_store_byte v[16:17], v39, off offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v33, off offset:102 +; ALIGNED-NEXT: global_store_byte v[16:17], v33, off offset:100 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v26, off offset:98 +; ALIGNED-NEXT: global_store_byte v[16:17], v26, off offset:96 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v48, off offset:94 +; ALIGNED-NEXT: global_store_byte v[16:17], v48, off offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v34, off offset:90 +; ALIGNED-NEXT: global_store_byte v[16:17], v34, off offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v27, off offset:86 +; ALIGNED-NEXT: global_store_byte v[16:17], v27, off offset:84 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v21, off offset:82 +; ALIGNED-NEXT: global_store_byte v[16:17], v21, off offset:80 +; ALIGNED-NEXT: buffer_store_dword v116, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: buffer_store_dword v117, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_store_dword v118, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_store_dword v119, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v35, off offset:78 +; ALIGNED-NEXT: global_store_byte v[16:17], v35, off offset:76 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v28, off offset:74 +; ALIGNED-NEXT: global_store_byte v[16:17], v28, off offset:72 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v22, off offset:70 +; ALIGNED-NEXT: global_store_byte v[16:17], v22, off offset:68 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v19, off offset:66 +; ALIGNED-NEXT: global_store_byte v[16:17], v19, off offset:64 +; ALIGNED-NEXT: buffer_store_dword v112, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: buffer_store_dword v113, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_store_dword v114, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_store_dword v115, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 8, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 8, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v50 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 8, v50 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v29, off offset:62 +; ALIGNED-NEXT: global_store_byte v[16:17], v29, off offset:60 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v23, off offset:58 +; ALIGNED-NEXT: global_store_byte v[16:17], v23, off offset:56 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v20, off offset:54 +; ALIGNED-NEXT: global_store_byte v[16:17], v20, off offset:52 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v18, off offset:50 +; ALIGNED-NEXT: global_store_byte v[16:17], v18, off offset:48 +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v15, off offset:42 +; ALIGNED-NEXT: global_store_byte v[16:17], v15, off offset:40 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v14, off offset:46 +; ALIGNED-NEXT: global_store_byte v[16:17], v14, off offset:44 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v13, off offset:34 +; ALIGNED-NEXT: global_store_byte v[16:17], v13, off offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v12, off offset:38 +; ALIGNED-NEXT: global_store_byte v[16:17], v12, off offset:36 +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v11, off offset:30 +; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v10, off offset:26 +; ALIGNED-NEXT: global_store_byte v[16:17], v10, off offset:24 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v9, off offset:22 +; ALIGNED-NEXT: global_store_byte v[16:17], v9, off offset:20 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v8, off offset:18 +; ALIGNED-NEXT: global_store_byte v[16:17], v8, off offset:16 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:247 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v65 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:255 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v49 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v49 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:253 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v36 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 8, v36 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:251 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v71 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v71 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:249 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v65 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:245 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v64 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 8, v64 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:215 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v67 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v67 +; ALIGNED-NEXT: global_store_byte v[16:17], v113, off offset:243 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v87 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v87 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:241 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v86 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 8, v86 +; ALIGNED-NEXT: global_store_byte v[16:17], v114, off offset:239 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v85 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v85 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:237 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 8, v80 +; ALIGNED-NEXT: global_store_byte v[16:17], v115, off offset:235 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v101 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:233 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v99 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:231 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v96 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 8, v96 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:229 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v81 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v81 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:227 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v100 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:225 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v97 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 8, v97 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:223 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v82 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 8, v82 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:221 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v66 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 8, v66 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:219 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v98 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v98 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:217 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v83 +; ALIGNED-NEXT: global_store_byte v[16:17], v65, off offset:213 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 24, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 8, v52 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:211 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v84 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 8, v84 +; ALIGNED-NEXT: global_store_byte v[16:17], v64, off offset:209 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v68 +; ALIGNED-NEXT: global_store_byte v[16:17], v67, off offset:149 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 24, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 8, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v68 +; ALIGNED-NEXT: global_store_byte v[16:17], v113, off offset:207 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v53 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 8, v53 +; ALIGNED-NEXT: global_store_byte v[16:17], v87, off offset:205 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 24, v37 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 8, v37 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:203 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v69 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v69 +; ALIGNED-NEXT: global_store_byte v[16:17], v86, off offset:201 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v54 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v54 +; ALIGNED-NEXT: global_store_byte v[16:17], v114, off offset:199 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v38 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 8, v38 +; ALIGNED-NEXT: global_store_byte v[16:17], v85, off offset:197 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 24, v32 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 8, v32 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:195 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v55 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 8, v55 +; ALIGNED-NEXT: global_store_byte v[16:17], v80, off offset:193 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v39 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 8, v39 +; ALIGNED-NEXT: global_store_byte v[16:17], v115, off offset:191 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v33 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 8, v33 +; ALIGNED-NEXT: global_store_byte v[16:17], v101, off offset:189 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v26 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v26, 8, v26 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:187 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v48 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 8, v48 +; ALIGNED-NEXT: global_store_byte v[16:17], v99, off offset:185 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v34 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 8, v34 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:183 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v27, 8, v27 +; ALIGNED-NEXT: global_store_byte v[16:17], v96, off offset:181 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 24, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 8, v21 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:179 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 8, v35 +; ALIGNED-NEXT: global_store_byte v[16:17], v81, off offset:177 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 24, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 8, v28 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:175 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 8, v22 +; ALIGNED-NEXT: global_store_byte v[16:17], v100, off offset:173 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 24, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 8, v19 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:171 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v29 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 8, v29 +; ALIGNED-NEXT: global_store_byte v[16:17], v97, off offset:169 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 24, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 8, v23 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:167 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 8, v20 +; ALIGNED-NEXT: global_store_byte v[16:17], v82, off offset:165 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 8, v18 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:163 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v15, 8, v15 +; ALIGNED-NEXT: global_store_byte v[16:17], v66, off offset:161 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v14 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v14, 8, v14 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:159 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v13 +; ALIGNED-NEXT: global_store_byte v[16:17], v98, off offset:157 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v12, 8, v12 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:155 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v11 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 8, v11 +; ALIGNED-NEXT: global_store_byte v[16:17], v83, off offset:153 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 24, v10 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 8, v10 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:151 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v9 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v9 +; ALIGNED-NEXT: global_store_byte v[16:17], v65, off offset:147 +; ALIGNED-NEXT: global_store_byte v[16:17], v52, off offset:145 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:143 +; ALIGNED-NEXT: global_store_byte v[16:17], v84, off offset:141 +; ALIGNED-NEXT: global_store_byte v[16:17], v64, off offset:139 +; ALIGNED-NEXT: global_store_byte v[16:17], v68, off offset:137 +; ALIGNED-NEXT: global_store_byte v[16:17], v113, off offset:135 +; ALIGNED-NEXT: global_store_byte v[16:17], v53, off offset:133 +; ALIGNED-NEXT: global_store_byte v[16:17], v87, off offset:131 +; ALIGNED-NEXT: global_store_byte v[16:17], v37, off offset:129 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:127 +; ALIGNED-NEXT: global_store_byte v[16:17], v69, off offset:125 +; ALIGNED-NEXT: global_store_byte v[16:17], v86, off offset:123 +; ALIGNED-NEXT: global_store_byte v[16:17], v54, off offset:121 +; ALIGNED-NEXT: global_store_byte v[16:17], v114, off offset:119 +; ALIGNED-NEXT: global_store_byte v[16:17], v38, off offset:117 +; ALIGNED-NEXT: global_store_byte v[16:17], v85, off offset:115 +; ALIGNED-NEXT: global_store_byte v[16:17], v32, off offset:113 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:111 +; ALIGNED-NEXT: global_store_byte v[16:17], v55, off offset:109 +; ALIGNED-NEXT: global_store_byte v[16:17], v80, off offset:107 +; ALIGNED-NEXT: global_store_byte v[16:17], v39, off offset:105 +; ALIGNED-NEXT: global_store_byte v[16:17], v115, off offset:103 +; ALIGNED-NEXT: global_store_byte v[16:17], v33, off offset:101 +; ALIGNED-NEXT: global_store_byte v[16:17], v101, off offset:99 +; ALIGNED-NEXT: global_store_byte v[16:17], v26, off offset:97 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:95 +; ALIGNED-NEXT: global_store_byte v[16:17], v48, off offset:93 +; ALIGNED-NEXT: global_store_byte v[16:17], v99, off offset:91 +; ALIGNED-NEXT: global_store_byte v[16:17], v34, off offset:89 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:87 +; ALIGNED-NEXT: global_store_byte v[16:17], v27, off offset:85 +; ALIGNED-NEXT: global_store_byte v[16:17], v96, off offset:83 +; ALIGNED-NEXT: global_store_byte v[16:17], v21, off offset:81 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:79 +; ALIGNED-NEXT: global_store_byte v[16:17], v35, off offset:77 +; ALIGNED-NEXT: global_store_byte v[16:17], v81, off offset:75 +; ALIGNED-NEXT: global_store_byte v[16:17], v28, off offset:73 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:71 +; ALIGNED-NEXT: global_store_byte v[16:17], v22, off offset:69 +; ALIGNED-NEXT: global_store_byte v[16:17], v100, off offset:67 +; ALIGNED-NEXT: global_store_byte v[16:17], v19, off offset:65 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:63 +; ALIGNED-NEXT: global_store_byte v[16:17], v29, off offset:61 +; ALIGNED-NEXT: global_store_byte v[16:17], v97, off offset:59 +; ALIGNED-NEXT: global_store_byte v[16:17], v23, off offset:57 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:55 +; ALIGNED-NEXT: global_store_byte v[16:17], v20, off offset:53 +; ALIGNED-NEXT: global_store_byte v[16:17], v82, off offset:51 +; ALIGNED-NEXT: global_store_byte v[16:17], v18, off offset:49 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:43 +; ALIGNED-NEXT: global_store_byte v[16:17], v15, off offset:41 +; ALIGNED-NEXT: global_store_byte v[16:17], v66, off offset:47 +; ALIGNED-NEXT: global_store_byte v[16:17], v14, off offset:45 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:35 +; ALIGNED-NEXT: global_store_byte v[16:17], v13, off offset:33 +; ALIGNED-NEXT: global_store_byte v[16:17], v98, off offset:39 +; ALIGNED-NEXT: global_store_byte v[16:17], v12, off offset:37 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:31 +; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:29 +; ALIGNED-NEXT: global_store_byte v[16:17], v83, off offset:27 +; ALIGNED-NEXT: global_store_byte v[16:17], v10, off offset:25 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:23 +; ALIGNED-NEXT: global_store_byte v[16:17], v9, off offset:21 +; ALIGNED-NEXT: global_store_byte v[16:17], v67, off offset:19 +; ALIGNED-NEXT: global_store_byte v[16:17], v8, off offset:17 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v7, off offset:14 +; ALIGNED-NEXT: global_store_byte v[16:17], v7, off offset:12 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v6, off offset:10 +; ALIGNED-NEXT: global_store_byte v[16:17], v6, off offset:8 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v5, off offset:6 +; ALIGNED-NEXT: global_store_byte v[16:17], v5, off offset:4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v4, off offset:2 +; ALIGNED-NEXT: global_store_byte v[16:17], v4, off +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 24, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v7, 8, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 24, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v6, 8, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 24, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v4, 8, v4 +; ALIGNED-NEXT: global_store_byte v[16:17], v8, off offset:15 +; ALIGNED-NEXT: global_store_byte v[16:17], v7, off offset:13 +; ALIGNED-NEXT: global_store_byte v[16:17], v9, off offset:11 +; ALIGNED-NEXT: global_store_byte v[16:17], v6, off offset:9 +; ALIGNED-NEXT: global_store_byte v[16:17], v10, off offset:7 +; ALIGNED-NEXT: global_store_byte v[16:17], v5, off offset:5 +; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3 +; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1 +; ALIGNED-NEXT: s_cbranch_vccnz .LBB1_1 +; ALIGNED-NEXT: ; %bb.2: ; %memcpy-split +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 +; ALIGNED-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memcpy_p1_p1_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB1_1: ; %load-store-loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: v_add_co_u32 v12, vcc_lo, v2, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: s_clause 0x2 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[12:13], off +; UNROLL3-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:16 +; UNROLL3-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:32 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[4:7], off +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[8:11], off offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:32 +; UNROLL3-NEXT: v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5] +; UNROLL3-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; UNROLL3-NEXT: s_cbranch_vccnz .LBB1_1 +; UNROLL3-NEXT: ; %bb.2: ; %memcpy-split +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2016 +; UNROLL3-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:2032 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:2032 +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + +define void @memcpy_p0_p4_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { +; CHECK-LABEL: memcpy_p0_p4_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: .LBB2_1: ; %load-store-loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_add_co_u32 v96, vcc_lo, v2, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: s_clause 0xf +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[96:97], off offset:240 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[96:97], off offset:224 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v[96:97], off offset:208 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v[96:97], off offset:192 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v[96:97], off offset:176 +; CHECK-NEXT: global_load_dwordx4 v[24:27], v[96:97], off offset:160 +; CHECK-NEXT: global_load_dwordx4 v[28:31], v[96:97], off offset:144 +; CHECK-NEXT: global_load_dwordx4 v[32:35], v[96:97], off offset:128 +; CHECK-NEXT: global_load_dwordx4 v[36:39], v[96:97], off offset:112 +; CHECK-NEXT: global_load_dwordx4 v[48:51], v[96:97], off offset:96 +; CHECK-NEXT: global_load_dwordx4 v[52:55], v[96:97], off offset:80 +; CHECK-NEXT: global_load_dwordx4 v[64:67], v[96:97], off offset:64 +; CHECK-NEXT: global_load_dwordx4 v[68:71], v[96:97], off offset:48 +; CHECK-NEXT: global_load_dwordx4 v[80:83], v[96:97], off offset:32 +; CHECK-NEXT: global_load_dwordx4 v[84:87], v[96:97], off offset:16 +; CHECK-NEXT: global_load_dwordx4 v[96:99], v[96:97], off +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: s_waitcnt vmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[4:7] offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(14) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[8:11] offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[12:15] offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[16:19] offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(11) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[20:23] offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(10) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[24:27] offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[28:31] offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[32:35] offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(7) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[36:39] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[48:51] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[52:55] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[64:67] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[68:71] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[80:83] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[84:87] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[96:99] +; CHECK-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; CHECK-NEXT: s_cbranch_vccnz .LBB2_1 +; CHECK-NEXT: ; %bb.2: ; %memcpy-split +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memcpy_p0_p4_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; ALIGNED-NEXT: .LBB2_1: ; %load-store-loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: v_add_co_u32 v4, vcc_lo, v2, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: v_add_co_u32 v96, vcc_lo, v0, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v1, vcc_lo +; ALIGNED-NEXT: s_clause 0xf +; ALIGNED-NEXT: global_load_dwordx4 v[98:101], v[4:5], off offset:240 +; ALIGNED-NEXT: global_load_dwordx4 v[84:87], v[4:5], off offset:224 +; ALIGNED-NEXT: global_load_dwordx4 v[80:83], v[4:5], off offset:208 +; ALIGNED-NEXT: global_load_dwordx4 v[68:71], v[4:5], off offset:192 +; ALIGNED-NEXT: global_load_dwordx4 v[64:67], v[4:5], off offset:176 +; ALIGNED-NEXT: global_load_dwordx4 v[52:55], v[4:5], off offset:160 +; ALIGNED-NEXT: global_load_dwordx4 v[48:51], v[4:5], off offset:144 +; ALIGNED-NEXT: global_load_dwordx4 v[36:39], v[4:5], off offset:128 +; ALIGNED-NEXT: global_load_dwordx4 v[32:35], v[4:5], off offset:112 +; ALIGNED-NEXT: global_load_dwordx4 v[28:31], v[4:5], off offset:96 +; ALIGNED-NEXT: global_load_dwordx4 v[24:27], v[4:5], off offset:80 +; ALIGNED-NEXT: global_load_dwordx4 v[20:23], v[4:5], off offset:64 +; ALIGNED-NEXT: global_load_dwordx4 v[16:19], v[4:5], off offset:48 +; ALIGNED-NEXT: global_load_dwordx4 v[12:15], v[4:5], off offset:32 +; ALIGNED-NEXT: global_load_dwordx4 v[8:11], v[4:5], off offset:16 +; ALIGNED-NEXT: global_load_dwordx4 v[4:7], v[4:5], off +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: s_waitcnt vmcnt(15) +; ALIGNED-NEXT: buffer_store_dword v100, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_store_dword v99, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_store_dword v98, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v100 offset:250 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v101 offset:254 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:252 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:248 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v99 offset:246 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:244 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v98 offset:242 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:240 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v98 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v98 +; ALIGNED-NEXT: s_waitcnt vmcnt(14) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v86 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v86 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:251 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v87 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:249 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v87 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:255 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v85 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:253 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v85 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:247 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v84 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:245 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v84 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:243 +; ALIGNED-NEXT: s_waitcnt vmcnt(13) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v82 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:241 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v82 +; ALIGNED-NEXT: buffer_store_dword v86, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_store_dword v87, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: buffer_store_dword v85, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v86 offset:234 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v87 offset:238 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:236 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:232 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v85 offset:230 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:228 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v84 offset:226 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:224 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v81 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v81 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:235 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v80 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:233 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v80 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:239 +; ALIGNED-NEXT: s_waitcnt vmcnt(12) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v70 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:237 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v70 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:231 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v71 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:229 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v71 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:227 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v69 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:225 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v69 +; ALIGNED-NEXT: buffer_store_dword v82, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_store_dword v83, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_store_dword v80, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v82 offset:218 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v83 offset:222 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:220 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:216 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v81 offset:214 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:212 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v80 offset:210 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:208 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v68 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v68 +; ALIGNED-NEXT: s_waitcnt vmcnt(11) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v66 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v66 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:219 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v67 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:217 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v67 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:223 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v65 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:221 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v65 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:215 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v64 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:213 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v64 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:211 +; ALIGNED-NEXT: s_waitcnt vmcnt(10) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v54 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:209 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v54 +; ALIGNED-NEXT: buffer_store_dword v70, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_store_dword v71, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v70 offset:202 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v71 offset:206 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:204 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:200 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v69 offset:198 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:196 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v68 offset:194 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:192 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v55 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 24, v55 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:203 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v53 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:201 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v53 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:207 +; ALIGNED-NEXT: s_waitcnt vmcnt(9) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v50 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:205 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v50 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:199 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v51 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:197 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v51 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:195 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v49 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:193 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v49 +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: buffer_store_dword v65, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_store_dword v64, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v66 offset:186 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v67 offset:190 +; ALIGNED-NEXT: flat_store_byte v[96:97], v67 offset:188 +; ALIGNED-NEXT: flat_store_byte v[96:97], v66 offset:184 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v65 offset:182 +; ALIGNED-NEXT: flat_store_byte v[96:97], v65 offset:180 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v64 offset:178 +; ALIGNED-NEXT: flat_store_byte v[96:97], v64 offset:176 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v48 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v48 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:187 +; ALIGNED-NEXT: s_waitcnt vmcnt(8) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v39 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v38 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v38 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:185 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v39 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:191 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v37 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:189 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v37 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:183 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v36 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:181 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v36 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:179 +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v34 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:177 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v34 +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v54 offset:170 +; ALIGNED-NEXT: flat_store_byte v[96:97], v54 offset:168 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v55 offset:174 +; ALIGNED-NEXT: flat_store_byte v[96:97], v55 offset:172 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v52 offset:162 +; ALIGNED-NEXT: flat_store_byte v[96:97], v52 offset:160 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v53 offset:166 +; ALIGNED-NEXT: flat_store_byte v[96:97], v53 offset:164 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 24, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 24, v33 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v116, 8, v33 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:171 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v32 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:169 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v32 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:173 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 24, v31 +; ALIGNED-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:163 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v31 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:161 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v29 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:175 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 24, v30 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v30 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:167 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:165 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v29 +; ALIGNED-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v50 offset:154 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v51 offset:158 +; ALIGNED-NEXT: flat_store_byte v[96:97], v51 offset:156 +; ALIGNED-NEXT: flat_store_byte v[96:97], v50 offset:152 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v49 offset:150 +; ALIGNED-NEXT: flat_store_byte v[96:97], v49 offset:148 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v48 offset:146 +; ALIGNED-NEXT: flat_store_byte v[96:97], v48 offset:144 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 24, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v28 +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v26 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:155 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:153 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:159 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:157 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:151 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:149 +; ALIGNED-NEXT: flat_store_byte v[96:97], v64 offset:147 +; ALIGNED-NEXT: flat_store_byte v[96:97], v65 offset:145 +; ALIGNED-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v38 offset:138 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v39 offset:142 +; ALIGNED-NEXT: flat_store_byte v[96:97], v39 offset:140 +; ALIGNED-NEXT: flat_store_byte v[96:97], v38 offset:136 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v37 offset:134 +; ALIGNED-NEXT: flat_store_byte v[96:97], v37 offset:132 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v36 offset:130 +; ALIGNED-NEXT: flat_store_byte v[96:97], v36 offset:128 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:143 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v26 +; ALIGNED-NEXT: flat_store_byte v[96:97], v66 offset:139 +; ALIGNED-NEXT: flat_store_byte v[96:97], v67 offset:137 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:141 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:135 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:133 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:131 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:129 +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v34 offset:122 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v35 offset:126 +; ALIGNED-NEXT: flat_store_byte v[96:97], v35 offset:124 +; ALIGNED-NEXT: flat_store_byte v[96:97], v34 offset:120 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v33 offset:118 +; ALIGNED-NEXT: flat_store_byte v[96:97], v33 offset:116 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v32 offset:114 +; ALIGNED-NEXT: flat_store_byte v[96:97], v32 offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 24, v14 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:123 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:121 +; ALIGNED-NEXT: flat_store_byte v[96:97], v53 offset:127 +; ALIGNED-NEXT: flat_store_byte v[96:97], v54 offset:125 +; ALIGNED-NEXT: flat_store_byte v[96:97], v55 offset:119 +; ALIGNED-NEXT: flat_store_byte v[96:97], v116 offset:117 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:115 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v10 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:113 +; ALIGNED-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v30 offset:106 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v31 offset:110 +; ALIGNED-NEXT: flat_store_byte v[96:97], v31 offset:108 +; ALIGNED-NEXT: flat_store_byte v[96:97], v30 offset:104 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v29 offset:102 +; ALIGNED-NEXT: flat_store_byte v[96:97], v29 offset:100 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v28 offset:98 +; ALIGNED-NEXT: flat_store_byte v[96:97], v28 offset:96 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:111 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 24, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 8, v14 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v10 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:109 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v6 +; ALIGNED-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 24, v11 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:103 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 8, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 24, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 8, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v17 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v17 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 24, v16 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 8, v16 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 24, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 24, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 8, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 8, v11 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v9 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 8, v9 +; ALIGNED-NEXT: flat_store_byte v[96:97], v52 offset:107 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 24, v8 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:105 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v8 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v7 +; ALIGNED-NEXT: flat_store_byte v[96:97], v48 offset:99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 24, v5 +; ALIGNED-NEXT: flat_store_byte v[96:97], v49 offset:97 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v5 +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_store_dword v24, off, s[0:3], s32 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v26 offset:90 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v27 offset:94 +; ALIGNED-NEXT: flat_store_byte v[96:97], v27 offset:92 +; ALIGNED-NEXT: flat_store_byte v[96:97], v26 offset:88 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v25 offset:86 +; ALIGNED-NEXT: flat_store_byte v[96:97], v25 offset:84 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v24 offset:82 +; ALIGNED-NEXT: flat_store_byte v[96:97], v24 offset:80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 8, v4 +; ALIGNED-NEXT: flat_store_byte v[96:97], v50 offset:91 +; ALIGNED-NEXT: flat_store_byte v[96:97], v51 offset:89 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:95 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:93 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:87 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:85 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:83 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:81 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v22 offset:74 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v23 offset:78 +; ALIGNED-NEXT: flat_store_byte v[96:97], v23 offset:76 +; ALIGNED-NEXT: flat_store_byte v[96:97], v22 offset:72 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v21 offset:70 +; ALIGNED-NEXT: flat_store_byte v[96:97], v21 offset:68 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v20 offset:66 +; ALIGNED-NEXT: flat_store_byte v[96:97], v20 offset:64 +; ALIGNED-NEXT: flat_store_byte v[96:97], v64 offset:75 +; ALIGNED-NEXT: flat_store_byte v[96:97], v65 offset:73 +; ALIGNED-NEXT: flat_store_byte v[96:97], v36 offset:79 +; ALIGNED-NEXT: flat_store_byte v[96:97], v37 offset:77 +; ALIGNED-NEXT: flat_store_byte v[96:97], v38 offset:71 +; ALIGNED-NEXT: flat_store_byte v[96:97], v39 offset:69 +; ALIGNED-NEXT: flat_store_byte v[96:97], v66 offset:67 +; ALIGNED-NEXT: flat_store_byte v[96:97], v67 offset:65 +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:59 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v18 offset:58 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:57 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v19 offset:62 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:63 +; ALIGNED-NEXT: flat_store_byte v[96:97], v19 offset:60 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:61 +; ALIGNED-NEXT: flat_store_byte v[96:97], v18 offset:56 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:55 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v17 offset:54 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:53 +; ALIGNED-NEXT: flat_store_byte v[96:97], v17 offset:52 +; ALIGNED-NEXT: flat_store_byte v[96:97], v32 offset:51 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v16 offset:50 +; ALIGNED-NEXT: flat_store_byte v[96:97], v33 offset:49 +; ALIGNED-NEXT: flat_store_byte v[96:97], v16 offset:48 +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v14 offset:42 +; ALIGNED-NEXT: flat_store_byte v[96:97], v34 offset:43 +; ALIGNED-NEXT: flat_store_byte v[96:97], v35 offset:41 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v15 offset:46 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:47 +; ALIGNED-NEXT: flat_store_byte v[96:97], v15 offset:44 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:45 +; ALIGNED-NEXT: flat_store_byte v[96:97], v14 offset:40 +; ALIGNED-NEXT: flat_store_byte v[96:97], v53 offset:39 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v13 offset:38 +; ALIGNED-NEXT: flat_store_byte v[96:97], v54 offset:37 +; ALIGNED-NEXT: flat_store_byte v[96:97], v13 offset:36 +; ALIGNED-NEXT: flat_store_byte v[96:97], v55 offset:35 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v12 offset:34 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:33 +; ALIGNED-NEXT: flat_store_byte v[96:97], v12 offset:32 +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v10 offset:26 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:27 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:25 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v11 offset:30 +; ALIGNED-NEXT: flat_store_byte v[96:97], v28 offset:31 +; ALIGNED-NEXT: flat_store_byte v[96:97], v11 offset:28 +; ALIGNED-NEXT: flat_store_byte v[96:97], v29 offset:29 +; ALIGNED-NEXT: flat_store_byte v[96:97], v10 offset:24 +; ALIGNED-NEXT: flat_store_byte v[96:97], v30 offset:23 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v9 offset:22 +; ALIGNED-NEXT: flat_store_byte v[96:97], v31 offset:21 +; ALIGNED-NEXT: flat_store_byte v[96:97], v9 offset:20 +; ALIGNED-NEXT: flat_store_byte v[96:97], v52 offset:19 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v8 offset:18 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:17 +; ALIGNED-NEXT: flat_store_byte v[96:97], v8 offset:16 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v6 offset:10 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:11 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:9 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v7 offset:14 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:15 +; ALIGNED-NEXT: flat_store_byte v[96:97], v7 offset:12 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:13 +; ALIGNED-NEXT: flat_store_byte v[96:97], v6 offset:8 +; ALIGNED-NEXT: flat_store_byte v[96:97], v48 offset:7 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v5 offset:6 +; ALIGNED-NEXT: flat_store_byte v[96:97], v49 offset:5 +; ALIGNED-NEXT: flat_store_byte v[96:97], v5 offset:4 +; ALIGNED-NEXT: flat_store_byte v[96:97], v24 offset:3 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v4 offset:2 +; ALIGNED-NEXT: flat_store_byte v[96:97], v25 offset:1 +; ALIGNED-NEXT: flat_store_byte v[96:97], v4 +; ALIGNED-NEXT: s_cbranch_vccnz .LBB2_1 +; ALIGNED-NEXT: ; %bb.2: ; %memcpy-split +; ALIGNED-NEXT: s_waitcnt lgkmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memcpy_p0_p4_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB2_1: ; %load-store-loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: v_add_co_u32 v12, vcc_lo, v2, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: s_clause 0x2 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 +; UNROLL3-NEXT: global_load_dwordx4 v[8:11], v[12:13], off +; UNROLL3-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:32 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[4:7] offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[8:11] +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[12:15] offset:32 +; UNROLL3-NEXT: v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5] +; UNROLL3-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; UNROLL3-NEXT: s_cbranch_vccnz .LBB2_1 +; UNROLL3-NEXT: ; %bb.2: ; %memcpy-split +; UNROLL3-NEXT: s_clause 0x1 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:2016 +; UNROLL3-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:2032 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[8:11] offset:2032 +; UNROLL3-NEXT: s_waitcnt lgkmcnt(0) +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + +define void @memcpy_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { +; CHECK-LABEL: memcpy_p5_p5_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: .LBB3_1: ; %load-store-loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_clause 0x3e +; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:252 +; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:248 +; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:244 +; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:240 +; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:236 +; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:232 +; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:228 +; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:224 +; CHECK-NEXT: buffer_load_dword v10, v1, s[0:3], 0 offen offset:220 +; CHECK-NEXT: buffer_load_dword v11, v1, s[0:3], 0 offen offset:216 +; CHECK-NEXT: buffer_load_dword v12, v1, s[0:3], 0 offen offset:212 +; CHECK-NEXT: buffer_load_dword v13, v1, s[0:3], 0 offen offset:208 +; CHECK-NEXT: buffer_load_dword v14, v1, s[0:3], 0 offen offset:204 +; CHECK-NEXT: buffer_load_dword v15, v1, s[0:3], 0 offen offset:200 +; CHECK-NEXT: buffer_load_dword v16, v1, s[0:3], 0 offen offset:196 +; CHECK-NEXT: buffer_load_dword v17, v1, s[0:3], 0 offen offset:192 +; CHECK-NEXT: buffer_load_dword v18, v1, s[0:3], 0 offen offset:188 +; CHECK-NEXT: buffer_load_dword v19, v1, s[0:3], 0 offen offset:184 +; CHECK-NEXT: buffer_load_dword v20, v1, s[0:3], 0 offen offset:180 +; CHECK-NEXT: buffer_load_dword v21, v1, s[0:3], 0 offen offset:176 +; CHECK-NEXT: buffer_load_dword v22, v1, s[0:3], 0 offen offset:172 +; CHECK-NEXT: buffer_load_dword v23, v1, s[0:3], 0 offen offset:168 +; CHECK-NEXT: buffer_load_dword v24, v1, s[0:3], 0 offen offset:164 +; CHECK-NEXT: buffer_load_dword v25, v1, s[0:3], 0 offen offset:160 +; CHECK-NEXT: buffer_load_dword v26, v1, s[0:3], 0 offen offset:156 +; CHECK-NEXT: buffer_load_dword v27, v1, s[0:3], 0 offen offset:152 +; CHECK-NEXT: buffer_load_dword v28, v1, s[0:3], 0 offen offset:148 +; CHECK-NEXT: buffer_load_dword v29, v1, s[0:3], 0 offen offset:144 +; CHECK-NEXT: buffer_load_dword v30, v1, s[0:3], 0 offen offset:140 +; CHECK-NEXT: buffer_load_dword v31, v1, s[0:3], 0 offen offset:136 +; CHECK-NEXT: buffer_load_dword v32, v1, s[0:3], 0 offen offset:132 +; CHECK-NEXT: buffer_load_dword v33, v1, s[0:3], 0 offen offset:128 +; CHECK-NEXT: buffer_load_dword v34, v1, s[0:3], 0 offen offset:124 +; CHECK-NEXT: buffer_load_dword v35, v1, s[0:3], 0 offen offset:120 +; CHECK-NEXT: buffer_load_dword v36, v1, s[0:3], 0 offen offset:116 +; CHECK-NEXT: buffer_load_dword v37, v1, s[0:3], 0 offen offset:112 +; CHECK-NEXT: buffer_load_dword v38, v1, s[0:3], 0 offen offset:108 +; CHECK-NEXT: buffer_load_dword v39, v1, s[0:3], 0 offen offset:104 +; CHECK-NEXT: buffer_load_dword v48, v1, s[0:3], 0 offen offset:100 +; CHECK-NEXT: buffer_load_dword v49, v1, s[0:3], 0 offen offset:96 +; CHECK-NEXT: buffer_load_dword v50, v1, s[0:3], 0 offen offset:92 +; CHECK-NEXT: buffer_load_dword v51, v1, s[0:3], 0 offen offset:88 +; CHECK-NEXT: buffer_load_dword v52, v1, s[0:3], 0 offen offset:84 +; CHECK-NEXT: buffer_load_dword v53, v1, s[0:3], 0 offen offset:80 +; CHECK-NEXT: buffer_load_dword v54, v1, s[0:3], 0 offen offset:76 +; CHECK-NEXT: buffer_load_dword v55, v1, s[0:3], 0 offen offset:72 +; CHECK-NEXT: buffer_load_dword v64, v1, s[0:3], 0 offen offset:68 +; CHECK-NEXT: buffer_load_dword v65, v1, s[0:3], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v66, v1, s[0:3], 0 offen offset:60 +; CHECK-NEXT: buffer_load_dword v67, v1, s[0:3], 0 offen offset:56 +; CHECK-NEXT: buffer_load_dword v68, v1, s[0:3], 0 offen offset:52 +; CHECK-NEXT: buffer_load_dword v69, v1, s[0:3], 0 offen offset:48 +; CHECK-NEXT: buffer_load_dword v70, v1, s[0:3], 0 offen offset:44 +; CHECK-NEXT: buffer_load_dword v71, v1, s[0:3], 0 offen offset:40 +; CHECK-NEXT: buffer_load_dword v80, v1, s[0:3], 0 offen offset:36 +; CHECK-NEXT: buffer_load_dword v81, v1, s[0:3], 0 offen offset:32 +; CHECK-NEXT: buffer_load_dword v82, v1, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_load_dword v83, v1, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_load_dword v84, v1, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_load_dword v85, v1, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v86, v1, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_load_dword v87, v1, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v96, v1, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v97, v1, s[0:3], 0 offen +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 +; CHECK-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; CHECK-NEXT: s_waitcnt vmcnt(62) +; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:252 +; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:248 +; CHECK-NEXT: s_waitcnt vmcnt(61) +; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:244 +; CHECK-NEXT: s_waitcnt vmcnt(60) +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(59) +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:236 +; CHECK-NEXT: s_waitcnt vmcnt(58) +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:232 +; CHECK-NEXT: s_waitcnt vmcnt(57) +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:228 +; CHECK-NEXT: s_waitcnt vmcnt(56) +; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(55) +; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:220 +; CHECK-NEXT: s_waitcnt vmcnt(54) +; CHECK-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:216 +; CHECK-NEXT: s_waitcnt vmcnt(53) +; CHECK-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:212 +; CHECK-NEXT: s_waitcnt vmcnt(52) +; CHECK-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(51) +; CHECK-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:204 +; CHECK-NEXT: s_waitcnt vmcnt(50) +; CHECK-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:200 +; CHECK-NEXT: s_waitcnt vmcnt(49) +; CHECK-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:196 +; CHECK-NEXT: s_waitcnt vmcnt(48) +; CHECK-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(47) +; CHECK-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:188 +; CHECK-NEXT: s_waitcnt vmcnt(46) +; CHECK-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:184 +; CHECK-NEXT: s_waitcnt vmcnt(45) +; CHECK-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:180 +; CHECK-NEXT: s_waitcnt vmcnt(44) +; CHECK-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(43) +; CHECK-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:172 +; CHECK-NEXT: s_waitcnt vmcnt(42) +; CHECK-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:168 +; CHECK-NEXT: s_waitcnt vmcnt(41) +; CHECK-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:164 +; CHECK-NEXT: s_waitcnt vmcnt(40) +; CHECK-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(39) +; CHECK-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:156 +; CHECK-NEXT: s_waitcnt vmcnt(38) +; CHECK-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:152 +; CHECK-NEXT: s_waitcnt vmcnt(37) +; CHECK-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:148 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:140 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:136 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:132 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen offset:116 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen offset:104 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen offset:100 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen offset:88 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen offset:84 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen offset:76 +; CHECK-NEXT: s_waitcnt vmcnt(18) +; CHECK-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(17) +; CHECK-NEXT: buffer_store_dword v64, v0, s[0:3], 0 offen offset:68 +; CHECK-NEXT: s_waitcnt vmcnt(16) +; CHECK-NEXT: buffer_store_dword v65, v0, s[0:3], 0 offen offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(15) +; CHECK-NEXT: buffer_store_dword v66, v0, s[0:3], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(14) +; CHECK-NEXT: buffer_store_dword v67, v0, s[0:3], 0 offen offset:56 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: buffer_store_dword v68, v0, s[0:3], 0 offen offset:52 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: buffer_store_dword v69, v0, s[0:3], 0 offen offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(11) +; CHECK-NEXT: buffer_store_dword v70, v0, s[0:3], 0 offen offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(10) +; CHECK-NEXT: buffer_store_dword v71, v0, s[0:3], 0 offen offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: buffer_store_dword v80, v0, s[0:3], 0 offen offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: buffer_store_dword v81, v0, s[0:3], 0 offen offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(7) +; CHECK-NEXT: buffer_store_dword v82, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: buffer_store_dword v83, v0, s[0:3], 0 offen offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: buffer_store_dword v84, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: buffer_store_dword v85, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: buffer_store_dword v86, v0, s[0:3], 0 offen offset:12 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: buffer_store_dword v87, v0, s[0:3], 0 offen offset:8 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: buffer_store_dword v96, v0, s[0:3], 0 offen offset:4 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: buffer_store_dword v97, v0, s[0:3], 0 offen +; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x100, v0 +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; CHECK-NEXT: s_cbranch_vccnz .LBB3_1 +; CHECK-NEXT: ; %bb.2: ; %memcpy-split +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memcpy_p5_p5_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v72, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v73, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v74, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v75, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v76, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v79, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v88, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v89, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v92, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v93, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v95, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v105, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v106, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v107, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v108, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v109, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v110, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v111, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v120, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v121, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v122, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v124, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: .LBB3_1: ; %load-store-loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: s_clause 0x34 +; ALIGNED-NEXT: buffer_load_ubyte v116, v1, s[0:3], 0 offen offset:255 +; ALIGNED-NEXT: buffer_load_ubyte v117, v1, s[0:3], 0 offen offset:254 +; ALIGNED-NEXT: buffer_load_ubyte v118, v1, s[0:3], 0 offen offset:253 +; ALIGNED-NEXT: buffer_load_ubyte v119, v1, s[0:3], 0 offen offset:252 +; ALIGNED-NEXT: buffer_load_ubyte v40, v1, s[0:3], 0 offen offset:251 +; ALIGNED-NEXT: buffer_load_ubyte v41, v1, s[0:3], 0 offen offset:250 +; ALIGNED-NEXT: buffer_load_ubyte v42, v1, s[0:3], 0 offen offset:249 +; ALIGNED-NEXT: buffer_load_ubyte v43, v1, s[0:3], 0 offen offset:248 +; ALIGNED-NEXT: buffer_load_ubyte v44, v1, s[0:3], 0 offen offset:247 +; ALIGNED-NEXT: buffer_load_ubyte v45, v1, s[0:3], 0 offen offset:246 +; ALIGNED-NEXT: buffer_load_ubyte v46, v1, s[0:3], 0 offen offset:245 +; ALIGNED-NEXT: buffer_load_ubyte v47, v1, s[0:3], 0 offen offset:244 +; ALIGNED-NEXT: buffer_load_ubyte v56, v1, s[0:3], 0 offen offset:243 +; ALIGNED-NEXT: buffer_load_ubyte v57, v1, s[0:3], 0 offen offset:242 +; ALIGNED-NEXT: buffer_load_ubyte v58, v1, s[0:3], 0 offen offset:241 +; ALIGNED-NEXT: buffer_load_ubyte v59, v1, s[0:3], 0 offen offset:240 +; ALIGNED-NEXT: buffer_load_ubyte v60, v1, s[0:3], 0 offen offset:239 +; ALIGNED-NEXT: buffer_load_ubyte v61, v1, s[0:3], 0 offen offset:238 +; ALIGNED-NEXT: buffer_load_ubyte v62, v1, s[0:3], 0 offen offset:237 +; ALIGNED-NEXT: buffer_load_ubyte v63, v1, s[0:3], 0 offen offset:236 +; ALIGNED-NEXT: buffer_load_ubyte v72, v1, s[0:3], 0 offen offset:235 +; ALIGNED-NEXT: buffer_load_ubyte v73, v1, s[0:3], 0 offen offset:234 +; ALIGNED-NEXT: buffer_load_ubyte v74, v1, s[0:3], 0 offen offset:233 +; ALIGNED-NEXT: buffer_load_ubyte v75, v1, s[0:3], 0 offen offset:232 +; ALIGNED-NEXT: buffer_load_ubyte v76, v1, s[0:3], 0 offen offset:231 +; ALIGNED-NEXT: buffer_load_ubyte v77, v1, s[0:3], 0 offen offset:230 +; ALIGNED-NEXT: buffer_load_ubyte v78, v1, s[0:3], 0 offen offset:229 +; ALIGNED-NEXT: buffer_load_ubyte v79, v1, s[0:3], 0 offen offset:228 +; ALIGNED-NEXT: buffer_load_ubyte v88, v1, s[0:3], 0 offen offset:227 +; ALIGNED-NEXT: buffer_load_ubyte v89, v1, s[0:3], 0 offen offset:226 +; ALIGNED-NEXT: buffer_load_ubyte v90, v1, s[0:3], 0 offen offset:225 +; ALIGNED-NEXT: buffer_load_ubyte v91, v1, s[0:3], 0 offen offset:224 +; ALIGNED-NEXT: buffer_load_ubyte v92, v1, s[0:3], 0 offen offset:223 +; ALIGNED-NEXT: buffer_load_ubyte v93, v1, s[0:3], 0 offen offset:222 +; ALIGNED-NEXT: buffer_load_ubyte v94, v1, s[0:3], 0 offen offset:221 +; ALIGNED-NEXT: buffer_load_ubyte v95, v1, s[0:3], 0 offen offset:220 +; ALIGNED-NEXT: buffer_load_ubyte v104, v1, s[0:3], 0 offen offset:219 +; ALIGNED-NEXT: buffer_load_ubyte v105, v1, s[0:3], 0 offen offset:218 +; ALIGNED-NEXT: buffer_load_ubyte v106, v1, s[0:3], 0 offen offset:217 +; ALIGNED-NEXT: buffer_load_ubyte v107, v1, s[0:3], 0 offen offset:216 +; ALIGNED-NEXT: buffer_load_ubyte v108, v1, s[0:3], 0 offen offset:215 +; ALIGNED-NEXT: buffer_load_ubyte v109, v1, s[0:3], 0 offen offset:214 +; ALIGNED-NEXT: buffer_load_ubyte v110, v1, s[0:3], 0 offen offset:213 +; ALIGNED-NEXT: buffer_load_ubyte v111, v1, s[0:3], 0 offen offset:212 +; ALIGNED-NEXT: buffer_load_ubyte v120, v1, s[0:3], 0 offen offset:211 +; ALIGNED-NEXT: buffer_load_ubyte v121, v1, s[0:3], 0 offen offset:210 +; ALIGNED-NEXT: buffer_load_ubyte v122, v1, s[0:3], 0 offen offset:209 +; ALIGNED-NEXT: buffer_load_ubyte v123, v1, s[0:3], 0 offen offset:208 +; ALIGNED-NEXT: buffer_load_ubyte v124, v1, s[0:3], 0 offen offset:207 +; ALIGNED-NEXT: buffer_load_ubyte v125, v1, s[0:3], 0 offen offset:206 +; ALIGNED-NEXT: buffer_load_ubyte v126, v1, s[0:3], 0 offen offset:205 +; ALIGNED-NEXT: buffer_load_ubyte v127, v1, s[0:3], 0 offen offset:204 +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:203 +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; ALIGNED-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x3e +; ALIGNED-NEXT: buffer_load_ubyte v3, v1, s[0:3], 0 offen offset:202 +; ALIGNED-NEXT: buffer_load_ubyte v4, v1, s[0:3], 0 offen offset:201 +; ALIGNED-NEXT: buffer_load_ubyte v5, v1, s[0:3], 0 offen offset:200 +; ALIGNED-NEXT: buffer_load_ubyte v6, v1, s[0:3], 0 offen offset:199 +; ALIGNED-NEXT: buffer_load_ubyte v7, v1, s[0:3], 0 offen offset:198 +; ALIGNED-NEXT: buffer_load_ubyte v8, v1, s[0:3], 0 offen offset:197 +; ALIGNED-NEXT: buffer_load_ubyte v9, v1, s[0:3], 0 offen offset:196 +; ALIGNED-NEXT: buffer_load_ubyte v10, v1, s[0:3], 0 offen offset:195 +; ALIGNED-NEXT: buffer_load_ubyte v11, v1, s[0:3], 0 offen offset:194 +; ALIGNED-NEXT: buffer_load_ubyte v12, v1, s[0:3], 0 offen offset:193 +; ALIGNED-NEXT: buffer_load_ubyte v13, v1, s[0:3], 0 offen offset:192 +; ALIGNED-NEXT: buffer_load_ubyte v14, v1, s[0:3], 0 offen offset:191 +; ALIGNED-NEXT: buffer_load_ubyte v15, v1, s[0:3], 0 offen offset:190 +; ALIGNED-NEXT: buffer_load_ubyte v16, v1, s[0:3], 0 offen offset:189 +; ALIGNED-NEXT: buffer_load_ubyte v17, v1, s[0:3], 0 offen offset:188 +; ALIGNED-NEXT: buffer_load_ubyte v18, v1, s[0:3], 0 offen offset:187 +; ALIGNED-NEXT: buffer_load_ubyte v19, v1, s[0:3], 0 offen offset:186 +; ALIGNED-NEXT: buffer_load_ubyte v20, v1, s[0:3], 0 offen offset:185 +; ALIGNED-NEXT: buffer_load_ubyte v21, v1, s[0:3], 0 offen offset:184 +; ALIGNED-NEXT: buffer_load_ubyte v22, v1, s[0:3], 0 offen offset:183 +; ALIGNED-NEXT: buffer_load_ubyte v23, v1, s[0:3], 0 offen offset:182 +; ALIGNED-NEXT: buffer_load_ubyte v24, v1, s[0:3], 0 offen offset:181 +; ALIGNED-NEXT: buffer_load_ubyte v25, v1, s[0:3], 0 offen offset:180 +; ALIGNED-NEXT: buffer_load_ubyte v26, v1, s[0:3], 0 offen offset:179 +; ALIGNED-NEXT: buffer_load_ubyte v27, v1, s[0:3], 0 offen offset:178 +; ALIGNED-NEXT: buffer_load_ubyte v28, v1, s[0:3], 0 offen offset:177 +; ALIGNED-NEXT: buffer_load_ubyte v29, v1, s[0:3], 0 offen offset:176 +; ALIGNED-NEXT: buffer_load_ubyte v30, v1, s[0:3], 0 offen offset:175 +; ALIGNED-NEXT: buffer_load_ubyte v31, v1, s[0:3], 0 offen offset:174 +; ALIGNED-NEXT: buffer_load_ubyte v32, v1, s[0:3], 0 offen offset:173 +; ALIGNED-NEXT: buffer_load_ubyte v33, v1, s[0:3], 0 offen offset:172 +; ALIGNED-NEXT: buffer_load_ubyte v34, v1, s[0:3], 0 offen offset:171 +; ALIGNED-NEXT: buffer_load_ubyte v35, v1, s[0:3], 0 offen offset:170 +; ALIGNED-NEXT: buffer_load_ubyte v36, v1, s[0:3], 0 offen offset:169 +; ALIGNED-NEXT: buffer_load_ubyte v37, v1, s[0:3], 0 offen offset:168 +; ALIGNED-NEXT: buffer_load_ubyte v38, v1, s[0:3], 0 offen offset:167 +; ALIGNED-NEXT: buffer_load_ubyte v39, v1, s[0:3], 0 offen offset:166 +; ALIGNED-NEXT: buffer_load_ubyte v48, v1, s[0:3], 0 offen offset:165 +; ALIGNED-NEXT: buffer_load_ubyte v49, v1, s[0:3], 0 offen offset:164 +; ALIGNED-NEXT: buffer_load_ubyte v50, v1, s[0:3], 0 offen offset:163 +; ALIGNED-NEXT: buffer_load_ubyte v51, v1, s[0:3], 0 offen offset:162 +; ALIGNED-NEXT: buffer_load_ubyte v52, v1, s[0:3], 0 offen offset:161 +; ALIGNED-NEXT: buffer_load_ubyte v53, v1, s[0:3], 0 offen offset:160 +; ALIGNED-NEXT: buffer_load_ubyte v54, v1, s[0:3], 0 offen offset:159 +; ALIGNED-NEXT: buffer_load_ubyte v55, v1, s[0:3], 0 offen offset:158 +; ALIGNED-NEXT: buffer_load_ubyte v64, v1, s[0:3], 0 offen offset:157 +; ALIGNED-NEXT: buffer_load_ubyte v65, v1, s[0:3], 0 offen offset:156 +; ALIGNED-NEXT: buffer_load_ubyte v66, v1, s[0:3], 0 offen offset:155 +; ALIGNED-NEXT: buffer_load_ubyte v67, v1, s[0:3], 0 offen offset:154 +; ALIGNED-NEXT: buffer_load_ubyte v68, v1, s[0:3], 0 offen offset:153 +; ALIGNED-NEXT: buffer_load_ubyte v69, v1, s[0:3], 0 offen offset:152 +; ALIGNED-NEXT: buffer_load_ubyte v70, v1, s[0:3], 0 offen offset:151 +; ALIGNED-NEXT: buffer_load_ubyte v71, v1, s[0:3], 0 offen offset:150 +; ALIGNED-NEXT: buffer_load_ubyte v80, v1, s[0:3], 0 offen offset:149 +; ALIGNED-NEXT: buffer_load_ubyte v81, v1, s[0:3], 0 offen offset:148 +; ALIGNED-NEXT: buffer_load_ubyte v82, v1, s[0:3], 0 offen offset:147 +; ALIGNED-NEXT: buffer_load_ubyte v83, v1, s[0:3], 0 offen offset:146 +; ALIGNED-NEXT: buffer_load_ubyte v84, v1, s[0:3], 0 offen offset:145 +; ALIGNED-NEXT: buffer_load_ubyte v85, v1, s[0:3], 0 offen offset:144 +; ALIGNED-NEXT: buffer_load_ubyte v86, v1, s[0:3], 0 offen offset:143 +; ALIGNED-NEXT: buffer_load_ubyte v87, v1, s[0:3], 0 offen offset:142 +; ALIGNED-NEXT: buffer_load_ubyte v96, v1, s[0:3], 0 offen offset:141 +; ALIGNED-NEXT: buffer_load_ubyte v97, v1, s[0:3], 0 offen offset:140 +; ALIGNED-NEXT: s_clause 0xa +; ALIGNED-NEXT: buffer_load_ubyte v98, v1, s[0:3], 0 offen offset:139 +; ALIGNED-NEXT: buffer_load_ubyte v99, v1, s[0:3], 0 offen offset:138 +; ALIGNED-NEXT: buffer_load_ubyte v100, v1, s[0:3], 0 offen offset:137 +; ALIGNED-NEXT: buffer_load_ubyte v101, v1, s[0:3], 0 offen offset:136 +; ALIGNED-NEXT: buffer_load_ubyte v102, v1, s[0:3], 0 offen offset:135 +; ALIGNED-NEXT: buffer_load_ubyte v103, v1, s[0:3], 0 offen offset:134 +; ALIGNED-NEXT: buffer_load_ubyte v112, v1, s[0:3], 0 offen offset:133 +; ALIGNED-NEXT: buffer_load_ubyte v113, v1, s[0:3], 0 offen offset:132 +; ALIGNED-NEXT: buffer_load_ubyte v114, v1, s[0:3], 0 offen offset:131 +; ALIGNED-NEXT: buffer_load_ubyte v115, v1, s[0:3], 0 offen offset:130 +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:129 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:128 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:127 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:126 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:125 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:123 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:122 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:121 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:119 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:118 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:117 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:116 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:115 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:114 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:113 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:111 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:110 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:109 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:107 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:106 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:105 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:103 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:102 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:101 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:100 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:99 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:98 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:97 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:96 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:95 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:94 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:93 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:91 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:90 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:89 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:87 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:86 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:85 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:84 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:83 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:82 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:81 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:80 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:79 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:78 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:77 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:76 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:75 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:74 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:73 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:72 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:71 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:70 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:69 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:68 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:67 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:66 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:65 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:64 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:63 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:62 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:61 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:60 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:59 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:58 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:57 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:56 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:55 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:54 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:53 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:52 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:51 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:50 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:49 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:48 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:47 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:46 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:45 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:44 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:43 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:42 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:41 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:40 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:39 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:38 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:37 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:36 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:35 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:34 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:33 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:31 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:30 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:29 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:27 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:26 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:25 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:24 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:23 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:22 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:21 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:20 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:19 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:18 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:17 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:16 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:15 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:14 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:13 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:12 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:11 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:10 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:9 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:8 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:7 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:6 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:5 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:3 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:2 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:1 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen +; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_byte v116, v0, s[0:3], 0 offen offset:255 +; ALIGNED-NEXT: buffer_store_byte v117, v0, s[0:3], 0 offen offset:254 +; ALIGNED-NEXT: buffer_store_byte v118, v0, s[0:3], 0 offen offset:253 +; ALIGNED-NEXT: buffer_store_byte v119, v0, s[0:3], 0 offen offset:252 +; ALIGNED-NEXT: buffer_store_byte v40, v0, s[0:3], 0 offen offset:251 +; ALIGNED-NEXT: buffer_store_byte v41, v0, s[0:3], 0 offen offset:250 +; ALIGNED-NEXT: buffer_store_byte v42, v0, s[0:3], 0 offen offset:249 +; ALIGNED-NEXT: buffer_store_byte v43, v0, s[0:3], 0 offen offset:248 +; ALIGNED-NEXT: buffer_store_byte v44, v0, s[0:3], 0 offen offset:247 +; ALIGNED-NEXT: buffer_store_byte v45, v0, s[0:3], 0 offen offset:246 +; ALIGNED-NEXT: buffer_store_byte v46, v0, s[0:3], 0 offen offset:245 +; ALIGNED-NEXT: buffer_store_byte v47, v0, s[0:3], 0 offen offset:244 +; ALIGNED-NEXT: buffer_store_byte v56, v0, s[0:3], 0 offen offset:243 +; ALIGNED-NEXT: buffer_store_byte v57, v0, s[0:3], 0 offen offset:242 +; ALIGNED-NEXT: buffer_store_byte v58, v0, s[0:3], 0 offen offset:241 +; ALIGNED-NEXT: buffer_store_byte v59, v0, s[0:3], 0 offen offset:240 +; ALIGNED-NEXT: buffer_store_byte v60, v0, s[0:3], 0 offen offset:239 +; ALIGNED-NEXT: buffer_store_byte v61, v0, s[0:3], 0 offen offset:238 +; ALIGNED-NEXT: buffer_store_byte v62, v0, s[0:3], 0 offen offset:237 +; ALIGNED-NEXT: buffer_store_byte v63, v0, s[0:3], 0 offen offset:236 +; ALIGNED-NEXT: buffer_store_byte v72, v0, s[0:3], 0 offen offset:235 +; ALIGNED-NEXT: buffer_store_byte v73, v0, s[0:3], 0 offen offset:234 +; ALIGNED-NEXT: buffer_store_byte v74, v0, s[0:3], 0 offen offset:233 +; ALIGNED-NEXT: buffer_store_byte v75, v0, s[0:3], 0 offen offset:232 +; ALIGNED-NEXT: buffer_store_byte v76, v0, s[0:3], 0 offen offset:231 +; ALIGNED-NEXT: buffer_store_byte v77, v0, s[0:3], 0 offen offset:230 +; ALIGNED-NEXT: buffer_store_byte v78, v0, s[0:3], 0 offen offset:229 +; ALIGNED-NEXT: buffer_store_byte v79, v0, s[0:3], 0 offen offset:228 +; ALIGNED-NEXT: buffer_store_byte v88, v0, s[0:3], 0 offen offset:227 +; ALIGNED-NEXT: buffer_store_byte v89, v0, s[0:3], 0 offen offset:226 +; ALIGNED-NEXT: buffer_store_byte v90, v0, s[0:3], 0 offen offset:225 +; ALIGNED-NEXT: buffer_store_byte v91, v0, s[0:3], 0 offen offset:224 +; ALIGNED-NEXT: buffer_store_byte v92, v0, s[0:3], 0 offen offset:223 +; ALIGNED-NEXT: buffer_store_byte v93, v0, s[0:3], 0 offen offset:222 +; ALIGNED-NEXT: buffer_store_byte v94, v0, s[0:3], 0 offen offset:221 +; ALIGNED-NEXT: buffer_store_byte v95, v0, s[0:3], 0 offen offset:220 +; ALIGNED-NEXT: buffer_store_byte v104, v0, s[0:3], 0 offen offset:219 +; ALIGNED-NEXT: buffer_store_byte v105, v0, s[0:3], 0 offen offset:218 +; ALIGNED-NEXT: buffer_store_byte v106, v0, s[0:3], 0 offen offset:217 +; ALIGNED-NEXT: buffer_store_byte v107, v0, s[0:3], 0 offen offset:216 +; ALIGNED-NEXT: buffer_store_byte v108, v0, s[0:3], 0 offen offset:215 +; ALIGNED-NEXT: buffer_store_byte v109, v0, s[0:3], 0 offen offset:214 +; ALIGNED-NEXT: buffer_store_byte v110, v0, s[0:3], 0 offen offset:213 +; ALIGNED-NEXT: buffer_store_byte v111, v0, s[0:3], 0 offen offset:212 +; ALIGNED-NEXT: buffer_store_byte v120, v0, s[0:3], 0 offen offset:211 +; ALIGNED-NEXT: buffer_store_byte v121, v0, s[0:3], 0 offen offset:210 +; ALIGNED-NEXT: buffer_store_byte v122, v0, s[0:3], 0 offen offset:209 +; ALIGNED-NEXT: buffer_store_byte v123, v0, s[0:3], 0 offen offset:208 +; ALIGNED-NEXT: buffer_store_byte v124, v0, s[0:3], 0 offen offset:207 +; ALIGNED-NEXT: buffer_store_byte v125, v0, s[0:3], 0 offen offset:206 +; ALIGNED-NEXT: buffer_store_byte v126, v0, s[0:3], 0 offen offset:205 +; ALIGNED-NEXT: buffer_store_byte v127, v0, s[0:3], 0 offen offset:204 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:203 +; ALIGNED-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:202 +; ALIGNED-NEXT: buffer_store_byte v4, v0, s[0:3], 0 offen offset:201 +; ALIGNED-NEXT: buffer_store_byte v5, v0, s[0:3], 0 offen offset:200 +; ALIGNED-NEXT: buffer_store_byte v6, v0, s[0:3], 0 offen offset:199 +; ALIGNED-NEXT: buffer_store_byte v7, v0, s[0:3], 0 offen offset:198 +; ALIGNED-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:197 +; ALIGNED-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:196 +; ALIGNED-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:195 +; ALIGNED-NEXT: buffer_store_byte v11, v0, s[0:3], 0 offen offset:194 +; ALIGNED-NEXT: buffer_store_byte v12, v0, s[0:3], 0 offen offset:193 +; ALIGNED-NEXT: buffer_store_byte v13, v0, s[0:3], 0 offen offset:192 +; ALIGNED-NEXT: buffer_store_byte v14, v0, s[0:3], 0 offen offset:191 +; ALIGNED-NEXT: buffer_store_byte v15, v0, s[0:3], 0 offen offset:190 +; ALIGNED-NEXT: buffer_store_byte v16, v0, s[0:3], 0 offen offset:189 +; ALIGNED-NEXT: buffer_store_byte v17, v0, s[0:3], 0 offen offset:188 +; ALIGNED-NEXT: buffer_store_byte v18, v0, s[0:3], 0 offen offset:187 +; ALIGNED-NEXT: buffer_store_byte v19, v0, s[0:3], 0 offen offset:186 +; ALIGNED-NEXT: buffer_store_byte v20, v0, s[0:3], 0 offen offset:185 +; ALIGNED-NEXT: buffer_store_byte v21, v0, s[0:3], 0 offen offset:184 +; ALIGNED-NEXT: buffer_store_byte v22, v0, s[0:3], 0 offen offset:183 +; ALIGNED-NEXT: buffer_store_byte v23, v0, s[0:3], 0 offen offset:182 +; ALIGNED-NEXT: buffer_store_byte v24, v0, s[0:3], 0 offen offset:181 +; ALIGNED-NEXT: buffer_store_byte v25, v0, s[0:3], 0 offen offset:180 +; ALIGNED-NEXT: buffer_store_byte v26, v0, s[0:3], 0 offen offset:179 +; ALIGNED-NEXT: buffer_store_byte v27, v0, s[0:3], 0 offen offset:178 +; ALIGNED-NEXT: buffer_store_byte v28, v0, s[0:3], 0 offen offset:177 +; ALIGNED-NEXT: buffer_store_byte v29, v0, s[0:3], 0 offen offset:176 +; ALIGNED-NEXT: buffer_store_byte v30, v0, s[0:3], 0 offen offset:175 +; ALIGNED-NEXT: buffer_store_byte v31, v0, s[0:3], 0 offen offset:174 +; ALIGNED-NEXT: buffer_store_byte v32, v0, s[0:3], 0 offen offset:173 +; ALIGNED-NEXT: buffer_store_byte v33, v0, s[0:3], 0 offen offset:172 +; ALIGNED-NEXT: buffer_store_byte v34, v0, s[0:3], 0 offen offset:171 +; ALIGNED-NEXT: buffer_store_byte v35, v0, s[0:3], 0 offen offset:170 +; ALIGNED-NEXT: buffer_store_byte v36, v0, s[0:3], 0 offen offset:169 +; ALIGNED-NEXT: buffer_store_byte v37, v0, s[0:3], 0 offen offset:168 +; ALIGNED-NEXT: buffer_store_byte v38, v0, s[0:3], 0 offen offset:167 +; ALIGNED-NEXT: buffer_store_byte v39, v0, s[0:3], 0 offen offset:166 +; ALIGNED-NEXT: buffer_store_byte v48, v0, s[0:3], 0 offen offset:165 +; ALIGNED-NEXT: buffer_store_byte v49, v0, s[0:3], 0 offen offset:164 +; ALIGNED-NEXT: buffer_store_byte v50, v0, s[0:3], 0 offen offset:163 +; ALIGNED-NEXT: buffer_store_byte v51, v0, s[0:3], 0 offen offset:162 +; ALIGNED-NEXT: buffer_store_byte v52, v0, s[0:3], 0 offen offset:161 +; ALIGNED-NEXT: buffer_store_byte v53, v0, s[0:3], 0 offen offset:160 +; ALIGNED-NEXT: buffer_store_byte v54, v0, s[0:3], 0 offen offset:159 +; ALIGNED-NEXT: buffer_store_byte v55, v0, s[0:3], 0 offen offset:158 +; ALIGNED-NEXT: buffer_store_byte v64, v0, s[0:3], 0 offen offset:157 +; ALIGNED-NEXT: buffer_store_byte v65, v0, s[0:3], 0 offen offset:156 +; ALIGNED-NEXT: buffer_store_byte v66, v0, s[0:3], 0 offen offset:155 +; ALIGNED-NEXT: buffer_store_byte v67, v0, s[0:3], 0 offen offset:154 +; ALIGNED-NEXT: buffer_store_byte v68, v0, s[0:3], 0 offen offset:153 +; ALIGNED-NEXT: buffer_store_byte v69, v0, s[0:3], 0 offen offset:152 +; ALIGNED-NEXT: buffer_store_byte v70, v0, s[0:3], 0 offen offset:151 +; ALIGNED-NEXT: buffer_store_byte v71, v0, s[0:3], 0 offen offset:150 +; ALIGNED-NEXT: buffer_store_byte v80, v0, s[0:3], 0 offen offset:149 +; ALIGNED-NEXT: buffer_store_byte v81, v0, s[0:3], 0 offen offset:148 +; ALIGNED-NEXT: buffer_store_byte v82, v0, s[0:3], 0 offen offset:147 +; ALIGNED-NEXT: buffer_store_byte v83, v0, s[0:3], 0 offen offset:146 +; ALIGNED-NEXT: buffer_store_byte v84, v0, s[0:3], 0 offen offset:145 +; ALIGNED-NEXT: buffer_store_byte v85, v0, s[0:3], 0 offen offset:144 +; ALIGNED-NEXT: buffer_store_byte v86, v0, s[0:3], 0 offen offset:143 +; ALIGNED-NEXT: buffer_store_byte v87, v0, s[0:3], 0 offen offset:142 +; ALIGNED-NEXT: buffer_store_byte v96, v0, s[0:3], 0 offen offset:141 +; ALIGNED-NEXT: buffer_store_byte v97, v0, s[0:3], 0 offen offset:140 +; ALIGNED-NEXT: buffer_store_byte v98, v0, s[0:3], 0 offen offset:139 +; ALIGNED-NEXT: buffer_store_byte v99, v0, s[0:3], 0 offen offset:138 +; ALIGNED-NEXT: buffer_store_byte v100, v0, s[0:3], 0 offen offset:137 +; ALIGNED-NEXT: buffer_store_byte v101, v0, s[0:3], 0 offen offset:136 +; ALIGNED-NEXT: buffer_store_byte v102, v0, s[0:3], 0 offen offset:135 +; ALIGNED-NEXT: buffer_store_byte v103, v0, s[0:3], 0 offen offset:134 +; ALIGNED-NEXT: buffer_store_byte v112, v0, s[0:3], 0 offen offset:133 +; ALIGNED-NEXT: buffer_store_byte v113, v0, s[0:3], 0 offen offset:132 +; ALIGNED-NEXT: buffer_store_byte v114, v0, s[0:3], 0 offen offset:131 +; ALIGNED-NEXT: buffer_store_byte v115, v0, s[0:3], 0 offen offset:130 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:129 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:128 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:127 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:126 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:125 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:124 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:123 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:122 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:121 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:120 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:119 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:118 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:117 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:116 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:115 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:114 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:113 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:112 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:111 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:110 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:109 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:108 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:107 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:106 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:105 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:104 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:103 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:102 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:101 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:100 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:99 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:98 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:97 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:96 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:95 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:94 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:93 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:92 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:91 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:90 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:89 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:88 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:87 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:86 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:85 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:84 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:83 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:82 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:81 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:80 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:79 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:78 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:77 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:76 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:75 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:74 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:73 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:72 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:71 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:70 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:69 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:68 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:67 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:66 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:65 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:64 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:63 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:62 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:61 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:60 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:59 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:58 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:57 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:56 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:55 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:54 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:53 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:52 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:51 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:50 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:49 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:48 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:47 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:46 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:45 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:44 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:43 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:42 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:41 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:40 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:39 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:38 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:37 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:36 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:35 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:34 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:33 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:32 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:31 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:30 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:29 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:28 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:27 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:26 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:25 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:24 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:23 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:22 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:21 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:20 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:19 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:18 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:17 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:16 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:15 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:14 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:13 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:12 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:11 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:10 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:9 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:8 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:7 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:6 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:5 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:4 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:2 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:1 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen +; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x100, v0 +; ALIGNED-NEXT: s_cbranch_vccnz .LBB3_1 +; ALIGNED-NEXT: ; %bb.2: ; %memcpy-split +; ALIGNED-NEXT: s_clause 0x2f +; ALIGNED-NEXT: buffer_load_dword v127, off, s[0:3], s32 +; ALIGNED-NEXT: buffer_load_dword v126, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_load_dword v125, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_load_dword v124, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_load_dword v123, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: buffer_load_dword v122, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_load_dword v121, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_load_dword v120, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: buffer_load_dword v111, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: buffer_load_dword v110, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_load_dword v109, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_load_dword v108, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_load_dword v107, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: buffer_load_dword v106, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_load_dword v105, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_load_dword v104, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_load_dword v95, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: buffer_load_dword v94, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_load_dword v93, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_load_dword v92, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_load_dword v91, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: buffer_load_dword v90, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_load_dword v89, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_load_dword v88, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_load_dword v79, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: buffer_load_dword v78, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_load_dword v77, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_load_dword v76, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_load_dword v75, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: buffer_load_dword v74, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_load_dword v73, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_load_dword v72, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memcpy_p5_p5_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: v_mov_b32_e32 v2, v1 +; UNROLL3-NEXT: v_mov_b32_e32 v3, v0 +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0 +; UNROLL3-NEXT: .LBB3_1: ; %load-store-loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: s_clause 0xb +; UNROLL3-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:44 +; UNROLL3-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:40 +; UNROLL3-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:36 +; UNROLL3-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:32 +; UNROLL3-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:28 +; UNROLL3-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 +; UNROLL3-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:20 +; UNROLL3-NEXT: buffer_load_dword v11, v2, s[0:3], 0 offen offset:16 +; UNROLL3-NEXT: buffer_load_dword v12, v2, s[0:3], 0 offen offset:12 +; UNROLL3-NEXT: buffer_load_dword v13, v2, s[0:3], 0 offen offset:8 +; UNROLL3-NEXT: buffer_load_dword v14, v2, s[0:3], 0 offen offset:4 +; UNROLL3-NEXT: buffer_load_dword v15, v2, s[0:3], 0 offen +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 48, v2 +; UNROLL3-NEXT: v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5] +; UNROLL3-NEXT: s_waitcnt vmcnt(11) +; UNROLL3-NEXT: buffer_store_dword v4, v3, s[0:3], 0 offen offset:44 +; UNROLL3-NEXT: s_waitcnt vmcnt(10) +; UNROLL3-NEXT: buffer_store_dword v5, v3, s[0:3], 0 offen offset:40 +; UNROLL3-NEXT: s_waitcnt vmcnt(9) +; UNROLL3-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen offset:36 +; UNROLL3-NEXT: s_waitcnt vmcnt(8) +; UNROLL3-NEXT: buffer_store_dword v7, v3, s[0:3], 0 offen offset:32 +; UNROLL3-NEXT: s_waitcnt vmcnt(7) +; UNROLL3-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen offset:28 +; UNROLL3-NEXT: s_waitcnt vmcnt(6) +; UNROLL3-NEXT: buffer_store_dword v9, v3, s[0:3], 0 offen offset:24 +; UNROLL3-NEXT: s_waitcnt vmcnt(5) +; UNROLL3-NEXT: buffer_store_dword v10, v3, s[0:3], 0 offen offset:20 +; UNROLL3-NEXT: s_waitcnt vmcnt(4) +; UNROLL3-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(3) +; UNROLL3-NEXT: buffer_store_dword v12, v3, s[0:3], 0 offen offset:12 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: buffer_store_dword v13, v3, s[0:3], 0 offen offset:8 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: buffer_store_dword v14, v3, s[0:3], 0 offen offset:4 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: buffer_store_dword v15, v3, s[0:3], 0 offen +; UNROLL3-NEXT: v_add_nc_u32_e32 v3, 48, v3 +; UNROLL3-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; UNROLL3-NEXT: s_cbranch_vccnz .LBB3_1 +; UNROLL3-NEXT: ; %bb.2: ; %memcpy-split +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:2028 +; UNROLL3-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:2024 +; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2020 +; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(3) +; UNROLL3-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:2028 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2024 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:2020 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:2016 +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:2044 +; UNROLL3-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:2040 +; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2036 +; UNROLL3-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:2032 +; UNROLL3-NEXT: s_waitcnt vmcnt(3) +; UNROLL3-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:2044 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2040 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:2036 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032 +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + +define void @memcpy_p0_p5_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { +; CHECK-LABEL: memcpy_p0_p5_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: .LBB4_1: ; %load-store-loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_clause 0x3e +; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:32 +; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:36 +; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:40 +; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:44 +; CHECK-NEXT: buffer_load_dword v11, v2, s[0:3], 0 offen offset:48 +; CHECK-NEXT: buffer_load_dword v12, v2, s[0:3], 0 offen offset:52 +; CHECK-NEXT: buffer_load_dword v13, v2, s[0:3], 0 offen offset:56 +; CHECK-NEXT: buffer_load_dword v14, v2, s[0:3], 0 offen offset:60 +; CHECK-NEXT: buffer_load_dword v18, v2, s[0:3], 0 offen offset:124 +; CHECK-NEXT: buffer_load_dword v17, v2, s[0:3], 0 offen offset:120 +; CHECK-NEXT: buffer_load_dword v16, v2, s[0:3], 0 offen offset:116 +; CHECK-NEXT: buffer_load_dword v15, v2, s[0:3], 0 offen offset:112 +; CHECK-NEXT: buffer_load_dword v22, v2, s[0:3], 0 offen offset:108 +; CHECK-NEXT: buffer_load_dword v21, v2, s[0:3], 0 offen offset:104 +; CHECK-NEXT: buffer_load_dword v20, v2, s[0:3], 0 offen offset:100 +; CHECK-NEXT: buffer_load_dword v19, v2, s[0:3], 0 offen offset:96 +; CHECK-NEXT: buffer_load_dword v26, v2, s[0:3], 0 offen offset:252 +; CHECK-NEXT: buffer_load_dword v25, v2, s[0:3], 0 offen offset:248 +; CHECK-NEXT: buffer_load_dword v24, v2, s[0:3], 0 offen offset:244 +; CHECK-NEXT: buffer_load_dword v23, v2, s[0:3], 0 offen offset:240 +; CHECK-NEXT: buffer_load_dword v30, v2, s[0:3], 0 offen offset:236 +; CHECK-NEXT: buffer_load_dword v29, v2, s[0:3], 0 offen offset:232 +; CHECK-NEXT: buffer_load_dword v28, v2, s[0:3], 0 offen offset:228 +; CHECK-NEXT: buffer_load_dword v27, v2, s[0:3], 0 offen offset:224 +; CHECK-NEXT: buffer_load_dword v34, v2, s[0:3], 0 offen offset:220 +; CHECK-NEXT: buffer_load_dword v33, v2, s[0:3], 0 offen offset:216 +; CHECK-NEXT: buffer_load_dword v32, v2, s[0:3], 0 offen offset:212 +; CHECK-NEXT: buffer_load_dword v31, v2, s[0:3], 0 offen offset:208 +; CHECK-NEXT: buffer_load_dword v38, v2, s[0:3], 0 offen offset:204 +; CHECK-NEXT: buffer_load_dword v37, v2, s[0:3], 0 offen offset:200 +; CHECK-NEXT: buffer_load_dword v36, v2, s[0:3], 0 offen offset:196 +; CHECK-NEXT: buffer_load_dword v35, v2, s[0:3], 0 offen offset:192 +; CHECK-NEXT: buffer_load_dword v51, v2, s[0:3], 0 offen offset:188 +; CHECK-NEXT: buffer_load_dword v50, v2, s[0:3], 0 offen offset:184 +; CHECK-NEXT: buffer_load_dword v49, v2, s[0:3], 0 offen offset:180 +; CHECK-NEXT: buffer_load_dword v48, v2, s[0:3], 0 offen offset:176 +; CHECK-NEXT: buffer_load_dword v55, v2, s[0:3], 0 offen offset:172 +; CHECK-NEXT: buffer_load_dword v54, v2, s[0:3], 0 offen offset:168 +; CHECK-NEXT: buffer_load_dword v53, v2, s[0:3], 0 offen offset:164 +; CHECK-NEXT: buffer_load_dword v52, v2, s[0:3], 0 offen offset:160 +; CHECK-NEXT: buffer_load_dword v67, v2, s[0:3], 0 offen offset:156 +; CHECK-NEXT: buffer_load_dword v66, v2, s[0:3], 0 offen offset:152 +; CHECK-NEXT: buffer_load_dword v65, v2, s[0:3], 0 offen offset:148 +; CHECK-NEXT: buffer_load_dword v64, v2, s[0:3], 0 offen offset:144 +; CHECK-NEXT: buffer_load_dword v71, v2, s[0:3], 0 offen offset:140 +; CHECK-NEXT: buffer_load_dword v70, v2, s[0:3], 0 offen offset:136 +; CHECK-NEXT: buffer_load_dword v69, v2, s[0:3], 0 offen offset:132 +; CHECK-NEXT: buffer_load_dword v68, v2, s[0:3], 0 offen offset:128 +; CHECK-NEXT: buffer_load_dword v83, v2, s[0:3], 0 offen offset:92 +; CHECK-NEXT: buffer_load_dword v82, v2, s[0:3], 0 offen offset:88 +; CHECK-NEXT: buffer_load_dword v81, v2, s[0:3], 0 offen offset:84 +; CHECK-NEXT: buffer_load_dword v80, v2, s[0:3], 0 offen offset:80 +; CHECK-NEXT: buffer_load_dword v87, v2, s[0:3], 0 offen offset:76 +; CHECK-NEXT: buffer_load_dword v86, v2, s[0:3], 0 offen offset:72 +; CHECK-NEXT: buffer_load_dword v85, v2, s[0:3], 0 offen offset:68 +; CHECK-NEXT: buffer_load_dword v84, v2, s[0:3], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v96, v2, s[0:3], 0 offen +; CHECK-NEXT: buffer_load_dword v97, v2, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v98, v2, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v99, v2, s[0:3], 0 offen offset:12 +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: v_add_nc_u32_e32 v2, 0x100, v2 +; CHECK-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; CHECK-NEXT: s_waitcnt vmcnt(41) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[23:26] offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(37) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[27:30] offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[31:34] offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[35:38] offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[48:51] offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[52:55] offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(17) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[64:67] offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[68:71] offset:128 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[15:18] offset:112 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[19:22] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[80:83] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[84:87] offset:64 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[11:14] offset:48 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[7:10] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[3:6] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[96:99] +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; CHECK-NEXT: s_cbranch_vccnz .LBB4_1 +; CHECK-NEXT: ; %bb.2: ; %memcpy-split +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memcpy_p0_p5_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v72, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v73, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v74, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v75, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v76, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v79, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v88, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v89, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v92, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v93, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v95, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v105, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v106, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v107, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v108, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v109, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v110, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v111, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v120, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v121, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v122, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v124, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1224 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Spill +; ALIGNED-NEXT: .LBB4_1: ; %load-store-loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: s_clause 0x39 +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:20 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:21 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:22 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:23 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:24 +; ALIGNED-NEXT: buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:25 +; ALIGNED-NEXT: buffer_load_ubyte v12, v2, s[0:3], 0 offen offset:26 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:30 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:31 +; ALIGNED-NEXT: buffer_load_ubyte v14, v2, s[0:3], 0 offen offset:32 +; ALIGNED-NEXT: buffer_load_ubyte v15, v2, s[0:3], 0 offen offset:33 +; ALIGNED-NEXT: buffer_load_ubyte v17, v2, s[0:3], 0 offen offset:34 +; ALIGNED-NEXT: buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:29 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:28 +; ALIGNED-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:27 +; ALIGNED-NEXT: buffer_load_ubyte v19, v2, s[0:3], 0 offen offset:35 +; ALIGNED-NEXT: buffer_load_ubyte v13, v2, s[0:3], 0 offen offset:36 +; ALIGNED-NEXT: buffer_load_ubyte v16, v2, s[0:3], 0 offen offset:37 +; ALIGNED-NEXT: buffer_load_ubyte v18, v2, s[0:3], 0 offen offset:38 +; ALIGNED-NEXT: buffer_load_ubyte v20, v2, s[0:3], 0 offen offset:39 +; ALIGNED-NEXT: buffer_load_ubyte v22, v2, s[0:3], 0 offen offset:40 +; ALIGNED-NEXT: buffer_load_ubyte v23, v2, s[0:3], 0 offen offset:41 +; ALIGNED-NEXT: buffer_load_ubyte v25, v2, s[0:3], 0 offen offset:42 +; ALIGNED-NEXT: buffer_load_ubyte v28, v2, s[0:3], 0 offen offset:43 +; ALIGNED-NEXT: buffer_load_ubyte v21, v2, s[0:3], 0 offen offset:44 +; ALIGNED-NEXT: buffer_load_ubyte v24, v2, s[0:3], 0 offen offset:45 +; ALIGNED-NEXT: buffer_load_ubyte v26, v2, s[0:3], 0 offen offset:46 +; ALIGNED-NEXT: buffer_load_ubyte v27, v2, s[0:3], 0 offen offset:47 +; ALIGNED-NEXT: buffer_load_ubyte v30, v2, s[0:3], 0 offen offset:48 +; ALIGNED-NEXT: buffer_load_ubyte v31, v2, s[0:3], 0 offen offset:49 +; ALIGNED-NEXT: buffer_load_ubyte v33, v2, s[0:3], 0 offen offset:50 +; ALIGNED-NEXT: buffer_load_ubyte v34, v2, s[0:3], 0 offen offset:51 +; ALIGNED-NEXT: buffer_load_ubyte v32, v2, s[0:3], 0 offen offset:52 +; ALIGNED-NEXT: buffer_load_ubyte v37, v2, s[0:3], 0 offen offset:53 +; ALIGNED-NEXT: buffer_load_ubyte v35, v2, s[0:3], 0 offen offset:54 +; ALIGNED-NEXT: buffer_load_ubyte v36, v2, s[0:3], 0 offen offset:55 +; ALIGNED-NEXT: buffer_load_ubyte v48, v2, s[0:3], 0 offen offset:56 +; ALIGNED-NEXT: buffer_load_ubyte v51, v2, s[0:3], 0 offen offset:57 +; ALIGNED-NEXT: buffer_load_ubyte v52, v2, s[0:3], 0 offen offset:58 +; ALIGNED-NEXT: buffer_load_ubyte v38, v2, s[0:3], 0 offen offset:60 +; ALIGNED-NEXT: buffer_load_ubyte v50, v2, s[0:3], 0 offen offset:61 +; ALIGNED-NEXT: buffer_load_ubyte v39, v2, s[0:3], 0 offen offset:62 +; ALIGNED-NEXT: buffer_load_ubyte v49, v2, s[0:3], 0 offen offset:63 +; ALIGNED-NEXT: buffer_load_ubyte v29, v2, s[0:3], 0 offen offset:64 +; ALIGNED-NEXT: buffer_load_ubyte v55, v2, s[0:3], 0 offen offset:65 +; ALIGNED-NEXT: buffer_load_ubyte v66, v2, s[0:3], 0 offen offset:66 +; ALIGNED-NEXT: buffer_load_ubyte v53, v2, s[0:3], 0 offen offset:59 +; ALIGNED-NEXT: buffer_load_ubyte v67, v2, s[0:3], 0 offen offset:67 +; ALIGNED-NEXT: buffer_load_ubyte v54, v2, s[0:3], 0 offen offset:68 +; ALIGNED-NEXT: buffer_load_ubyte v64, v2, s[0:3], 0 offen offset:69 +; ALIGNED-NEXT: buffer_load_ubyte v65, v2, s[0:3], 0 offen offset:70 +; ALIGNED-NEXT: buffer_load_ubyte v68, v2, s[0:3], 0 offen offset:71 +; ALIGNED-NEXT: buffer_load_ubyte v69, v2, s[0:3], 0 offen offset:76 +; ALIGNED-NEXT: buffer_load_ubyte v70, v2, s[0:3], 0 offen offset:77 +; ALIGNED-NEXT: buffer_load_ubyte v71, v2, s[0:3], 0 offen offset:78 +; ALIGNED-NEXT: buffer_load_ubyte v80, v2, s[0:3], 0 offen offset:79 +; ALIGNED-NEXT: buffer_load_ubyte v127, v2, s[0:3], 0 offen offset:19 +; ALIGNED-NEXT: buffer_load_ubyte v81, v2, s[0:3], 0 offen offset:75 +; ALIGNED-NEXT: s_waitcnt vmcnt(57) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(56) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(55) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(54) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(53) +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(52) +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(51) +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(50) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(49) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(48) +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: s_waitcnt vmcnt(45) +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(44) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(43) +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v3, v9, 8, v5 +; ALIGNED-NEXT: s_waitcnt vmcnt(41) +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v4, v8, 8, v6 +; ALIGNED-NEXT: v_lshl_or_b32 v5, v10, 8, v7 +; ALIGNED-NEXT: v_lshl_or_b32 v6, v11, 8, v12 +; ALIGNED-NEXT: v_lshl_or_b32 v7, v15, 8, v14 +; ALIGNED-NEXT: v_lshl_or_b32 v8, v19, 8, v17 +; ALIGNED-NEXT: s_waitcnt vmcnt(40) +; ALIGNED-NEXT: v_lshl_or_b32 v9, v16, 8, v13 +; ALIGNED-NEXT: s_waitcnt vmcnt(38) +; ALIGNED-NEXT: v_lshl_or_b32 v10, v20, 8, v18 +; ALIGNED-NEXT: s_waitcnt vmcnt(36) +; ALIGNED-NEXT: v_lshl_or_b32 v11, v23, 8, v22 +; ALIGNED-NEXT: s_waitcnt vmcnt(34) +; ALIGNED-NEXT: v_lshl_or_b32 v12, v28, 8, v25 +; ALIGNED-NEXT: s_waitcnt vmcnt(32) +; ALIGNED-NEXT: v_lshl_or_b32 v13, v24, 8, v21 +; ALIGNED-NEXT: s_waitcnt vmcnt(30) +; ALIGNED-NEXT: v_lshl_or_b32 v14, v27, 8, v26 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 16, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v6, 16, v5 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v8, 16, v7 +; ALIGNED-NEXT: v_lshl_or_b32 v5, v10, 16, v9 +; ALIGNED-NEXT: v_lshl_or_b32 v6, v12, 16, v11 +; ALIGNED-NEXT: v_lshl_or_b32 v7, v14, 16, v13 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(28) +; ALIGNED-NEXT: v_lshl_or_b32 v15, v31, 8, v30 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(26) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v34, 8, v33 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:532 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(24) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v37, 8, v32 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(22) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v36, 8, v35 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:576 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(17) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v50, 8, v38 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:588 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(15) +; ALIGNED-NEXT: v_lshl_or_b32 v5, v49, 8, v39 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:604 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v6, v51, 8, v48 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:616 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(11) +; ALIGNED-NEXT: v_lshl_or_b32 v7, v53, 8, v52 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v0, 16, v15 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 16, v1 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v5, 16, v4 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:85 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v7, 16, v6 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:652 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v55, 8, v29 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:656 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(11) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v67, 8, v66 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:664 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(9) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v64, 8, v54 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:668 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v68, 8, v65 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:86 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:82 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v70, 8, v69 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:83 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:74 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:724 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v80, 8, v71 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:528 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:73 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:552 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:564 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:728 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:72 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:568 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:540 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:544 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:556 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:560 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:572 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:580 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:596 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:592 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:612 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:608 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:620 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:636 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:624 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:632 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:628 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:640 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:648 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:644 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:660 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:672 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:692 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:684 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:676 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v64, off, s[0:3], s32 offset:680 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v65, off, s[0:3], s32 offset:688 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:696 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:700 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v70, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v71, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v80, off, s[0:3], s32 offset:716 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(8) +; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 offset:1152 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:736 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:87 +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:768 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:772 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:764 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:756 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:740 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:732 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:720 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v81, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:84 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:81 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:744 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:80 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:776 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:760 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:752 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:748 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:98 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:102 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:103 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:94 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:95 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:93 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:91 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:788 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:852 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:856 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:796 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:800 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:792 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:784 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:90 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:816 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:101 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:89 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:808 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:820 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:848 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:812 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:804 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:99 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:100 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:844 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:97 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:96 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:836 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:840 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:832 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:828 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:114 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:118 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:119 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:860 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:110 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:111 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:109 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:107 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:868 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:932 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:936 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:876 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:880 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:872 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:864 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:106 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:896 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:117 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:105 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:888 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:900 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:928 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:892 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:884 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:115 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:116 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:924 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:113 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:904 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:916 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:920 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:912 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:908 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:130 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:134 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:135 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:940 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:126 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:127 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:125 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:123 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:948 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1012 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1016 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:956 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:960 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:952 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:944 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:122 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:976 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:133 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:121 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:968 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:980 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1008 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:972 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:964 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:131 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:132 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1004 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:129 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:128 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:996 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1000 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:992 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v7 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1020 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:142 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:143 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:141 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:139 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:140 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1036 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1040 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1032 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1024 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:138 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1056 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:137 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:136 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1060 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1052 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:145 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1064 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:144 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1084 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x5 +; ALIGNED-NEXT: buffer_load_ubyte v125, v2, s[0:3], 0 offen offset:146 +; ALIGNED-NEXT: buffer_load_ubyte v126, v2, s[0:3], 0 offen offset:147 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:148 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:149 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:150 +; ALIGNED-NEXT: buffer_load_ubyte v123, v2, s[0:3], 0 offen offset:151 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v126, 8, v125 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1104 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1112 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1116 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v123, 8, v5 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 8, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v121, v2, s[0:3], 0 offen offset:156 +; ALIGNED-NEXT: buffer_load_ubyte v109, v2, s[0:3], 0 offen offset:157 +; ALIGNED-NEXT: buffer_load_ubyte v108, v2, s[0:3], 0 offen offset:158 +; ALIGNED-NEXT: buffer_load_ubyte v107, v2, s[0:3], 0 offen offset:159 +; ALIGNED-NEXT: buffer_load_ubyte v106, v2, s[0:3], 0 offen offset:155 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v109, 8, v121 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v107, 8, v108 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1136 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v105, v2, s[0:3], 0 offen offset:152 +; ALIGNED-NEXT: buffer_load_ubyte v93, v2, s[0:3], 0 offen offset:153 +; ALIGNED-NEXT: buffer_load_ubyte v91, v2, s[0:3], 0 offen offset:154 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v93, 8, v105 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v106, 8, v91 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1144 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v89, v2, s[0:3], 0 offen offset:160 +; ALIGNED-NEXT: buffer_load_ubyte v78, v2, s[0:3], 0 offen offset:161 +; ALIGNED-NEXT: buffer_load_ubyte v73, v2, s[0:3], 0 offen offset:162 +; ALIGNED-NEXT: buffer_load_ubyte v74, v2, s[0:3], 0 offen offset:163 +; ALIGNED-NEXT: buffer_load_ubyte v79, v2, s[0:3], 0 offen offset:164 +; ALIGNED-NEXT: buffer_load_ubyte v75, v2, s[0:3], 0 offen offset:165 +; ALIGNED-NEXT: buffer_load_ubyte v76, v2, s[0:3], 0 offen offset:166 +; ALIGNED-NEXT: buffer_load_ubyte v72, v2, s[0:3], 0 offen offset:167 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v78, 8, v89 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v74, 8, v73 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v72, 8, v76 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v75, 8, v79 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1160 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v63, v2, s[0:3], 0 offen offset:172 +; ALIGNED-NEXT: buffer_load_ubyte v61, v2, s[0:3], 0 offen offset:173 +; ALIGNED-NEXT: buffer_load_ubyte v62, v2, s[0:3], 0 offen offset:174 +; ALIGNED-NEXT: buffer_load_ubyte v60, v2, s[0:3], 0 offen offset:175 +; ALIGNED-NEXT: buffer_load_ubyte v57, v2, s[0:3], 0 offen offset:171 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v61, 8, v63 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v60, 8, v62 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1164 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v59, v2, s[0:3], 0 offen offset:168 +; ALIGNED-NEXT: buffer_load_ubyte v56, v2, s[0:3], 0 offen offset:169 +; ALIGNED-NEXT: buffer_load_ubyte v47, v2, s[0:3], 0 offen offset:170 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v56, 8, v59 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v57, 8, v47 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v46, v2, s[0:3], 0 offen offset:176 +; ALIGNED-NEXT: buffer_load_ubyte v43, v2, s[0:3], 0 offen offset:177 +; ALIGNED-NEXT: buffer_load_ubyte v119, v2, s[0:3], 0 offen offset:178 +; ALIGNED-NEXT: buffer_load_ubyte v40, v2, s[0:3], 0 offen offset:179 +; ALIGNED-NEXT: buffer_load_ubyte v45, v2, s[0:3], 0 offen offset:180 +; ALIGNED-NEXT: buffer_load_ubyte v41, v2, s[0:3], 0 offen offset:181 +; ALIGNED-NEXT: buffer_load_ubyte v42, v2, s[0:3], 0 offen offset:182 +; ALIGNED-NEXT: buffer_load_ubyte v118, v2, s[0:3], 0 offen offset:183 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v43, 8, v46 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v40, 8, v119 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v118, 8, v42 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v41, 8, v45 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1176 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v117, v2, s[0:3], 0 offen offset:188 +; ALIGNED-NEXT: buffer_load_ubyte v115, v2, s[0:3], 0 offen offset:189 +; ALIGNED-NEXT: buffer_load_ubyte v116, v2, s[0:3], 0 offen offset:190 +; ALIGNED-NEXT: buffer_load_ubyte v114, v2, s[0:3], 0 offen offset:191 +; ALIGNED-NEXT: buffer_load_ubyte v112, v2, s[0:3], 0 offen offset:187 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v115, 8, v117 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v114, 8, v116 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1180 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v113, v2, s[0:3], 0 offen offset:184 +; ALIGNED-NEXT: buffer_load_ubyte v103, v2, s[0:3], 0 offen offset:185 +; ALIGNED-NEXT: buffer_load_ubyte v102, v2, s[0:3], 0 offen offset:186 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v103, 8, v113 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v112, 8, v102 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1184 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v100, v2, s[0:3], 0 offen offset:192 +; ALIGNED-NEXT: buffer_load_ubyte v98, v2, s[0:3], 0 offen offset:193 +; ALIGNED-NEXT: buffer_load_ubyte v87, v2, s[0:3], 0 offen offset:194 +; ALIGNED-NEXT: buffer_load_ubyte v86, v2, s[0:3], 0 offen offset:195 +; ALIGNED-NEXT: buffer_load_ubyte v99, v2, s[0:3], 0 offen offset:196 +; ALIGNED-NEXT: buffer_load_ubyte v97, v2, s[0:3], 0 offen offset:197 +; ALIGNED-NEXT: buffer_load_ubyte v96, v2, s[0:3], 0 offen offset:198 +; ALIGNED-NEXT: buffer_load_ubyte v85, v2, s[0:3], 0 offen offset:199 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v98, 8, v100 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v86, 8, v87 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v85, 8, v96 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1188 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v97, 8, v99 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1192 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v83, v2, s[0:3], 0 offen offset:204 +; ALIGNED-NEXT: buffer_load_ubyte v81, v2, s[0:3], 0 offen offset:205 +; ALIGNED-NEXT: buffer_load_ubyte v82, v2, s[0:3], 0 offen offset:206 +; ALIGNED-NEXT: buffer_load_ubyte v80, v2, s[0:3], 0 offen offset:207 +; ALIGNED-NEXT: buffer_load_ubyte v71, v2, s[0:3], 0 offen offset:203 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v81, 8, v83 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v80, 8, v82 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1196 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v70, v2, s[0:3], 0 offen offset:200 +; ALIGNED-NEXT: buffer_load_ubyte v69, v2, s[0:3], 0 offen offset:201 +; ALIGNED-NEXT: buffer_load_ubyte v68, v2, s[0:3], 0 offen offset:202 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v69, 8, v70 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v71, 8, v68 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1200 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v67, v2, s[0:3], 0 offen offset:212 +; ALIGNED-NEXT: buffer_load_ubyte v54, v2, s[0:3], 0 offen offset:213 +; ALIGNED-NEXT: buffer_load_ubyte v65, v2, s[0:3], 0 offen offset:214 +; ALIGNED-NEXT: buffer_load_ubyte v52, v2, s[0:3], 0 offen offset:215 +; ALIGNED-NEXT: buffer_load_ubyte v55, v2, s[0:3], 0 offen offset:211 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v54, 8, v67 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v52, 8, v65 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v66, v2, s[0:3], 0 offen offset:216 +; ALIGNED-NEXT: buffer_load_ubyte v53, v2, s[0:3], 0 offen offset:217 +; ALIGNED-NEXT: buffer_load_ubyte v49, v2, s[0:3], 0 offen offset:218 +; ALIGNED-NEXT: buffer_load_ubyte v48, v2, s[0:3], 0 offen offset:219 +; ALIGNED-NEXT: buffer_load_ubyte v64, v2, s[0:3], 0 offen offset:220 +; ALIGNED-NEXT: buffer_load_ubyte v51, v2, s[0:3], 0 offen offset:221 +; ALIGNED-NEXT: buffer_load_ubyte v50, v2, s[0:3], 0 offen offset:222 +; ALIGNED-NEXT: buffer_load_ubyte v39, v2, s[0:3], 0 offen offset:223 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v53, 8, v66 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v48, 8, v49 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v51, 8, v64 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v39, 8, v50 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v38, v2, s[0:3], 0 offen offset:208 +; ALIGNED-NEXT: buffer_load_ubyte v36, v2, s[0:3], 0 offen offset:209 +; ALIGNED-NEXT: buffer_load_ubyte v37, v2, s[0:3], 0 offen offset:210 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v36, 8, v38 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v55, 8, v37 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1216 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v35, v2, s[0:3], 0 offen offset:224 +; ALIGNED-NEXT: buffer_load_ubyte v33, v2, s[0:3], 0 offen offset:225 +; ALIGNED-NEXT: buffer_load_ubyte v29, v2, s[0:3], 0 offen offset:226 +; ALIGNED-NEXT: buffer_load_ubyte v30, v2, s[0:3], 0 offen offset:227 +; ALIGNED-NEXT: buffer_load_ubyte v34, v2, s[0:3], 0 offen offset:228 +; ALIGNED-NEXT: buffer_load_ubyte v31, v2, s[0:3], 0 offen offset:229 +; ALIGNED-NEXT: buffer_load_ubyte v32, v2, s[0:3], 0 offen offset:230 +; ALIGNED-NEXT: buffer_load_ubyte v28, v2, s[0:3], 0 offen offset:231 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v33, 8, v35 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v30, 8, v29 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v31, 8, v34 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v28, 8, v32 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1220 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x17 +; ALIGNED-NEXT: buffer_load_ubyte v27, v2, s[0:3], 0 offen offset:236 +; ALIGNED-NEXT: buffer_load_ubyte v25, v2, s[0:3], 0 offen offset:237 +; ALIGNED-NEXT: buffer_load_ubyte v26, v2, s[0:3], 0 offen offset:238 +; ALIGNED-NEXT: buffer_load_ubyte v24, v2, s[0:3], 0 offen offset:239 +; ALIGNED-NEXT: buffer_load_ubyte v23, v2, s[0:3], 0 offen offset:235 +; ALIGNED-NEXT: buffer_load_ubyte v22, v2, s[0:3], 0 offen offset:232 +; ALIGNED-NEXT: buffer_load_ubyte v21, v2, s[0:3], 0 offen offset:233 +; ALIGNED-NEXT: buffer_load_ubyte v20, v2, s[0:3], 0 offen offset:234 +; ALIGNED-NEXT: buffer_load_ubyte v19, v2, s[0:3], 0 offen offset:240 +; ALIGNED-NEXT: buffer_load_ubyte v17, v2, s[0:3], 0 offen offset:241 +; ALIGNED-NEXT: buffer_load_ubyte v13, v2, s[0:3], 0 offen offset:242 +; ALIGNED-NEXT: buffer_load_ubyte v14, v2, s[0:3], 0 offen offset:243 +; ALIGNED-NEXT: buffer_load_ubyte v18, v2, s[0:3], 0 offen offset:244 +; ALIGNED-NEXT: buffer_load_ubyte v15, v2, s[0:3], 0 offen offset:245 +; ALIGNED-NEXT: buffer_load_ubyte v16, v2, s[0:3], 0 offen offset:246 +; ALIGNED-NEXT: buffer_load_ubyte v12, v2, s[0:3], 0 offen offset:247 +; ALIGNED-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:252 +; ALIGNED-NEXT: buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:253 +; ALIGNED-NEXT: buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:254 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:255 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:251 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:248 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:249 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:250 +; ALIGNED-NEXT: v_lshl_or_b32 v124, v4, 16, v3 +; ALIGNED-NEXT: s_clause 0x5 +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen +; ALIGNED-NEXT: buffer_load_ubyte v94, v2, s[0:3], 0 offen offset:2 +; ALIGNED-NEXT: buffer_load_ubyte v88, v2, s[0:3], 0 offen offset:4 +; ALIGNED-NEXT: buffer_load_ubyte v90, v2, s[0:3], 0 offen offset:5 +; ALIGNED-NEXT: buffer_load_ubyte v92, v2, s[0:3], 0 offen offset:6 +; ALIGNED-NEXT: buffer_load_ubyte v95, v2, s[0:3], 0 offen offset:7 +; ALIGNED-NEXT: s_waitcnt vmcnt(28) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v25, 8, v27 +; ALIGNED-NEXT: s_waitcnt vmcnt(26) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v24, 8, v26 +; ALIGNED-NEXT: s_waitcnt vmcnt(14) +; ALIGNED-NEXT: v_lshl_or_b32 v44, v12, 8, v16 +; ALIGNED-NEXT: s_waitcnt vmcnt(10) +; ALIGNED-NEXT: v_lshl_or_b32 v58, v8, 8, v10 +; ALIGNED-NEXT: v_lshl_or_b32 v104, v4, 16, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v21, 8, v22 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v23, 8, v20 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v88, off, s[0:3], s32 offset:1088 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:1096 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v92, off, s[0:3], s32 offset:1100 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v77, v4, 16, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v17, 8, v19 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v14, 8, v13 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v95, off, s[0:3], s32 offset:1108 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v101, v4, 16, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v15, 8, v18 +; ALIGNED-NEXT: v_lshl_or_b32 v84, v44, 16, v4 +; ALIGNED-NEXT: v_lshl_or_b32 v44, v9, 8, v11 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v58, 16, v44 +; ALIGNED-NEXT: v_lshl_or_b32 v44, v5, 8, v6 +; ALIGNED-NEXT: v_lshl_or_b32 v58, v7, 8, v1 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v58, 16, v44 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v44, v2, s[0:3], 0 offen offset:1 +; ALIGNED-NEXT: buffer_load_ubyte v58, v2, s[0:3], 0 offen offset:3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:1092 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:1076 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:1080 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v44, v44, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v58, v58, 8, v94 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v58, 16, v44 +; ALIGNED-NEXT: v_lshl_or_b32 v44, v90, 8, v88 +; ALIGNED-NEXT: v_lshl_or_b32 v58, v95, 8, v92 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1120 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v58, 16, v44 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v122, v2, s[0:3], 0 offen offset:12 +; ALIGNED-NEXT: buffer_load_ubyte v111, v2, s[0:3], 0 offen offset:13 +; ALIGNED-NEXT: buffer_load_ubyte v120, v2, s[0:3], 0 offen offset:14 +; ALIGNED-NEXT: buffer_load_ubyte v110, v2, s[0:3], 0 offen offset:15 +; ALIGNED-NEXT: buffer_load_ubyte v94, v2, s[0:3], 0 offen offset:11 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v44, v111, 8, v122 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v58, v110, 8, v120 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v58, 16, v44 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1140 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v95, v2, s[0:3], 0 offen offset:8 +; ALIGNED-NEXT: buffer_load_ubyte v92, v2, s[0:3], 0 offen offset:9 +; ALIGNED-NEXT: buffer_load_ubyte v90, v2, s[0:3], 0 offen offset:10 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v44, v92, 8, v95 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v58, v94, 8, v90 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v58, 16, v44 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v88, v2, s[0:3], 0 offen offset:16 +; ALIGNED-NEXT: buffer_load_ubyte v44, v2, s[0:3], 0 offen offset:18 +; ALIGNED-NEXT: buffer_load_ubyte v58, v2, s[0:3], 0 offen offset:17 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: v_add_nc_u32_e32 v2, 0x100, v2 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v127, 8, v44 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v127, v58, 8, v88 +; ALIGNED-NEXT: v_lshl_or_b32 v127, v0, 16, v127 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_add_co_u32 v3, vcc_lo, v0, s4 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1224 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s5, v0, vcc_lo +; ALIGNED-NEXT: flat_store_byte v[3:4], v1 offset:250 +; ALIGNED-NEXT: flat_store_byte v[3:4], v7 offset:251 +; ALIGNED-NEXT: flat_store_byte v[3:4], v5 offset:249 +; ALIGNED-NEXT: flat_store_byte v[3:4], v8 offset:255 +; ALIGNED-NEXT: flat_store_byte v[3:4], v9 offset:253 +; ALIGNED-NEXT: flat_store_byte v[3:4], v10 offset:254 +; ALIGNED-NEXT: flat_store_byte v[3:4], v11 offset:252 +; ALIGNED-NEXT: flat_store_byte v[3:4], v6 offset:248 +; ALIGNED-NEXT: flat_store_byte v[3:4], v13 offset:242 +; ALIGNED-NEXT: flat_store_byte v[3:4], v14 offset:243 +; ALIGNED-NEXT: flat_store_byte v[3:4], v17 offset:241 +; ALIGNED-NEXT: flat_store_byte v[3:4], v12 offset:247 +; ALIGNED-NEXT: flat_store_byte v[3:4], v15 offset:245 +; ALIGNED-NEXT: flat_store_byte v[3:4], v16 offset:246 +; ALIGNED-NEXT: flat_store_byte v[3:4], v18 offset:244 +; ALIGNED-NEXT: flat_store_byte v[3:4], v19 offset:240 +; ALIGNED-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: buffer_store_dword v124, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1220 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: v_cmp_gt_u64_e64 s6, 0x800, s[4:5] +; ALIGNED-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: flat_store_byte v[3:4], v20 offset:234 +; ALIGNED-NEXT: flat_store_byte v[3:4], v23 offset:235 +; ALIGNED-NEXT: flat_store_byte v[3:4], v21 offset:233 +; ALIGNED-NEXT: flat_store_byte v[3:4], v24 offset:239 +; ALIGNED-NEXT: flat_store_byte v[3:4], v25 offset:237 +; ALIGNED-NEXT: flat_store_byte v[3:4], v26 offset:238 +; ALIGNED-NEXT: flat_store_byte v[3:4], v27 offset:236 +; ALIGNED-NEXT: flat_store_byte v[3:4], v22 offset:232 +; ALIGNED-NEXT: flat_store_byte v[3:4], v29 offset:226 +; ALIGNED-NEXT: flat_store_byte v[3:4], v30 offset:227 +; ALIGNED-NEXT: flat_store_byte v[3:4], v33 offset:225 +; ALIGNED-NEXT: flat_store_byte v[3:4], v28 offset:231 +; ALIGNED-NEXT: flat_store_byte v[3:4], v31 offset:229 +; ALIGNED-NEXT: flat_store_byte v[3:4], v32 offset:230 +; ALIGNED-NEXT: flat_store_byte v[3:4], v34 offset:228 +; ALIGNED-NEXT: flat_store_byte v[3:4], v35 offset:224 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1216 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: flat_store_byte v[3:4], v54 offset:213 +; ALIGNED-NEXT: flat_store_byte v[3:4], v52 offset:215 +; ALIGNED-NEXT: flat_store_byte v[3:4], v36 offset:209 +; ALIGNED-NEXT: flat_store_byte v[3:4], v55 offset:211 +; ALIGNED-NEXT: flat_store_byte v[3:4], v37 offset:210 +; ALIGNED-NEXT: flat_store_byte v[3:4], v65 offset:214 +; ALIGNED-NEXT: flat_store_byte v[3:4], v67 offset:212 +; ALIGNED-NEXT: flat_store_byte v[3:4], v49 offset:218 +; ALIGNED-NEXT: flat_store_byte v[3:4], v48 offset:219 +; ALIGNED-NEXT: flat_store_byte v[3:4], v53 offset:217 +; ALIGNED-NEXT: flat_store_byte v[3:4], v39 offset:223 +; ALIGNED-NEXT: flat_store_byte v[3:4], v51 offset:221 +; ALIGNED-NEXT: flat_store_byte v[3:4], v50 offset:222 +; ALIGNED-NEXT: flat_store_byte v[3:4], v64 offset:220 +; ALIGNED-NEXT: flat_store_byte v[3:4], v66 offset:216 +; ALIGNED-NEXT: flat_store_byte v[3:4], v38 offset:208 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1200 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1196 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1192 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1188 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: flat_store_byte v[3:4], v68 offset:202 +; ALIGNED-NEXT: flat_store_byte v[3:4], v71 offset:203 +; ALIGNED-NEXT: flat_store_byte v[3:4], v69 offset:201 +; ALIGNED-NEXT: flat_store_byte v[3:4], v80 offset:207 +; ALIGNED-NEXT: flat_store_byte v[3:4], v81 offset:205 +; ALIGNED-NEXT: flat_store_byte v[3:4], v82 offset:206 +; ALIGNED-NEXT: flat_store_byte v[3:4], v83 offset:204 +; ALIGNED-NEXT: flat_store_byte v[3:4], v70 offset:200 +; ALIGNED-NEXT: flat_store_byte v[3:4], v87 offset:194 +; ALIGNED-NEXT: flat_store_byte v[3:4], v86 offset:195 +; ALIGNED-NEXT: flat_store_byte v[3:4], v98 offset:193 +; ALIGNED-NEXT: flat_store_byte v[3:4], v85 offset:199 +; ALIGNED-NEXT: flat_store_byte v[3:4], v97 offset:197 +; ALIGNED-NEXT: flat_store_byte v[3:4], v96 offset:198 +; ALIGNED-NEXT: flat_store_byte v[3:4], v99 offset:196 +; ALIGNED-NEXT: flat_store_byte v[3:4], v100 offset:192 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1184 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:296 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1180 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:300 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1176 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:292 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:288 +; ALIGNED-NEXT: flat_store_byte v[3:4], v102 offset:186 +; ALIGNED-NEXT: flat_store_byte v[3:4], v112 offset:187 +; ALIGNED-NEXT: flat_store_byte v[3:4], v103 offset:185 +; ALIGNED-NEXT: flat_store_byte v[3:4], v114 offset:191 +; ALIGNED-NEXT: flat_store_byte v[3:4], v115 offset:189 +; ALIGNED-NEXT: flat_store_byte v[3:4], v116 offset:190 +; ALIGNED-NEXT: flat_store_byte v[3:4], v117 offset:188 +; ALIGNED-NEXT: flat_store_byte v[3:4], v113 offset:184 +; ALIGNED-NEXT: flat_store_byte v[3:4], v119 offset:178 +; ALIGNED-NEXT: flat_store_byte v[3:4], v40 offset:179 +; ALIGNED-NEXT: flat_store_byte v[3:4], v43 offset:177 +; ALIGNED-NEXT: flat_store_byte v[3:4], v118 offset:183 +; ALIGNED-NEXT: flat_store_byte v[3:4], v41 offset:181 +; ALIGNED-NEXT: flat_store_byte v[3:4], v42 offset:182 +; ALIGNED-NEXT: flat_store_byte v[3:4], v45 offset:180 +; ALIGNED-NEXT: flat_store_byte v[3:4], v46 offset:176 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:312 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1164 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:316 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1160 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:308 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:304 +; ALIGNED-NEXT: flat_store_byte v[3:4], v47 offset:170 +; ALIGNED-NEXT: flat_store_byte v[3:4], v57 offset:171 +; ALIGNED-NEXT: flat_store_byte v[3:4], v56 offset:169 +; ALIGNED-NEXT: flat_store_byte v[3:4], v60 offset:175 +; ALIGNED-NEXT: flat_store_byte v[3:4], v61 offset:173 +; ALIGNED-NEXT: flat_store_byte v[3:4], v62 offset:174 +; ALIGNED-NEXT: flat_store_byte v[3:4], v63 offset:172 +; ALIGNED-NEXT: flat_store_byte v[3:4], v59 offset:168 +; ALIGNED-NEXT: flat_store_byte v[3:4], v73 offset:162 +; ALIGNED-NEXT: flat_store_byte v[3:4], v74 offset:163 +; ALIGNED-NEXT: flat_store_byte v[3:4], v78 offset:161 +; ALIGNED-NEXT: flat_store_byte v[3:4], v72 offset:167 +; ALIGNED-NEXT: flat_store_byte v[3:4], v75 offset:165 +; ALIGNED-NEXT: flat_store_byte v[3:4], v76 offset:166 +; ALIGNED-NEXT: flat_store_byte v[3:4], v79 offset:164 +; ALIGNED-NEXT: flat_store_byte v[3:4], v89 offset:160 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1144 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1136 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: flat_store_byte v[3:4], v91 offset:154 +; ALIGNED-NEXT: flat_store_byte v[3:4], v106 offset:155 +; ALIGNED-NEXT: flat_store_byte v[3:4], v93 offset:153 +; ALIGNED-NEXT: flat_store_byte v[3:4], v107 offset:159 +; ALIGNED-NEXT: flat_store_byte v[3:4], v109 offset:157 +; ALIGNED-NEXT: flat_store_byte v[3:4], v108 offset:158 +; ALIGNED-NEXT: flat_store_byte v[3:4], v121 offset:156 +; ALIGNED-NEXT: flat_store_byte v[3:4], v105 offset:152 +; ALIGNED-NEXT: flat_store_byte v[3:4], v125 offset:146 +; ALIGNED-NEXT: flat_store_byte v[3:4], v126 offset:147 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:145 +; ALIGNED-NEXT: flat_store_byte v[3:4], v123 offset:151 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:149 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:150 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:148 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:144 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:138 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:139 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:137 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:143 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:141 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:142 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:140 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:136 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:130 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:996 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:131 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:129 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:135 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:133 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:134 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:132 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:128 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:360 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:968 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:364 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:948 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:356 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:940 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:352 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:980 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:122 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:976 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:123 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:972 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:121 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:960 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:127 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:952 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:125 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:956 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:126 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:944 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:124 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:964 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:120 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:924 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:114 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:115 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:912 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:113 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:936 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:119 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:928 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:117 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:932 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:118 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:920 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:116 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:908 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:112 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:904 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:376 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:888 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:380 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:868 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:372 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:860 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:368 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:900 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:106 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:896 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:107 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:892 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:105 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:880 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:111 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:872 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:109 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:876 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:110 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:864 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:108 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:884 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:104 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:844 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:98 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:836 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:99 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:832 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:97 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:856 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:103 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:848 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:101 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:852 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:102 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:840 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:100 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:828 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:96 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:328 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:808 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:332 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:788 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:324 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:320 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:820 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:90 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:816 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:91 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:812 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:89 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:800 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:95 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:792 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:93 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:796 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:94 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:784 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:92 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:804 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:88 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:764 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:82 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:756 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:83 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:752 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:81 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:776 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:87 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:768 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:85 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:772 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:86 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:760 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:84 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:748 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:80 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:744 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:344 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:728 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:348 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:724 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:340 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:336 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:740 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:74 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:736 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:75 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:732 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:73 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:716 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:79 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:704 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:77 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:708 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:78 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:700 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:76 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:720 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:72 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:692 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:66 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:684 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:67 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:672 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:65 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:696 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:71 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:680 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:69 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:688 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:70 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:676 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:68 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:64 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:668 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:424 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:664 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:428 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:420 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:416 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:61 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:58 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:59 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:57 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:63 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:62 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:60 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:56 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:53 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:50 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:51 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:49 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:55 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:54 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:52 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:48 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:444 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:440 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:436 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:432 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:43 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:42 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:41 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:40 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:47 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:46 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:45 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:44 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:35 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:34 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:33 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:32 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:39 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:38 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:37 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:36 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:392 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:396 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:516 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:388 +; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 offset:384 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:26 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:27 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:25 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:31 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:29 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:30 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:28 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:24 +; ALIGNED-NEXT: flat_store_byte v[3:4], v44 offset:18 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1152 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:19 +; ALIGNED-NEXT: flat_store_byte v[3:4], v58 offset:17 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:23 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:21 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:22 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:20 +; ALIGNED-NEXT: flat_store_byte v[3:4], v88 offset:16 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:408 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1140 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:412 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:404 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:400 +; ALIGNED-NEXT: flat_store_byte v[3:4], v90 offset:10 +; ALIGNED-NEXT: flat_store_byte v[3:4], v94 offset:11 +; ALIGNED-NEXT: flat_store_byte v[3:4], v111 offset:13 +; ALIGNED-NEXT: flat_store_byte v[3:4], v92 offset:9 +; ALIGNED-NEXT: flat_store_byte v[3:4], v110 offset:15 +; ALIGNED-NEXT: flat_store_byte v[3:4], v120 offset:14 +; ALIGNED-NEXT: flat_store_byte v[3:4], v122 offset:12 +; ALIGNED-NEXT: flat_store_byte v[3:4], v95 offset:8 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:2 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:3 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:1 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:7 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:5 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:6 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:4 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 +; ALIGNED-NEXT: s_cbranch_vccnz .LBB4_1 +; ALIGNED-NEXT: ; %bb.2: ; %memcpy-split +; ALIGNED-NEXT: s_clause 0x2f +; ALIGNED-NEXT: buffer_load_dword v127, off, s[0:3], s32 +; ALIGNED-NEXT: buffer_load_dword v126, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_load_dword v125, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_load_dword v124, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_load_dword v123, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: buffer_load_dword v122, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_load_dword v121, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_load_dword v120, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: buffer_load_dword v111, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: buffer_load_dword v110, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_load_dword v109, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_load_dword v108, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_load_dword v107, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: buffer_load_dword v106, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_load_dword v105, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_load_dword v104, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_load_dword v95, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: buffer_load_dword v94, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_load_dword v93, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_load_dword v92, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_load_dword v91, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: buffer_load_dword v90, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_load_dword v89, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_load_dword v88, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_load_dword v79, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: buffer_load_dword v78, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_load_dword v77, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_load_dword v76, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_load_dword v75, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: buffer_load_dword v74, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_load_dword v73, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_load_dword v72, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memcpy_p0_p5_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: v_mov_b32_e32 v3, v2 +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0 +; UNROLL3-NEXT: s_inst_prefetch 0x1 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB4_1: ; %load-store-loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: s_clause 0xb +; UNROLL3-NEXT: buffer_load_dword v4, v3, s[0:3], 0 offen +; UNROLL3-NEXT: buffer_load_dword v5, v3, s[0:3], 0 offen offset:4 +; UNROLL3-NEXT: buffer_load_dword v6, v3, s[0:3], 0 offen offset:8 +; UNROLL3-NEXT: buffer_load_dword v7, v3, s[0:3], 0 offen offset:12 +; UNROLL3-NEXT: buffer_load_dword v8, v3, s[0:3], 0 offen offset:16 +; UNROLL3-NEXT: buffer_load_dword v9, v3, s[0:3], 0 offen offset:20 +; UNROLL3-NEXT: buffer_load_dword v10, v3, s[0:3], 0 offen offset:24 +; UNROLL3-NEXT: buffer_load_dword v11, v3, s[0:3], 0 offen offset:28 +; UNROLL3-NEXT: buffer_load_dword v12, v3, s[0:3], 0 offen offset:32 +; UNROLL3-NEXT: buffer_load_dword v13, v3, s[0:3], 0 offen offset:36 +; UNROLL3-NEXT: buffer_load_dword v14, v3, s[0:3], 0 offen offset:40 +; UNROLL3-NEXT: buffer_load_dword v15, v3, s[0:3], 0 offen offset:44 +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: v_add_nc_u32_e32 v3, 48, v3 +; UNROLL3-NEXT: v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5] +; UNROLL3-NEXT: s_waitcnt vmcnt(4) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[8:11] offset:16 +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[4:7] +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[12:15] offset:32 +; UNROLL3-NEXT: s_and_b32 vcc_lo, exec_lo, s6 +; UNROLL3-NEXT: s_cbranch_vccnz .LBB4_1 +; UNROLL3-NEXT: ; %bb.2: ; %memcpy-split +; UNROLL3-NEXT: s_inst_prefetch 0x2 +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:2016 +; UNROLL3-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:2020 +; UNROLL3-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:2024 +; UNROLL3-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:2028 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:2016 +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:2032 +; UNROLL3-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:2036 +; UNROLL3-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:2040 +; UNROLL3-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:2044 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:2032 +; UNROLL3-NEXT: s_waitcnt lgkmcnt(0) +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + + +; memmove for address spaces 0, 1, 4, 5 + +define void @memmove_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { +; CHECK-LABEL: memmove_p0_p0_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s4, exec_lo +; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] +; CHECK-NEXT: s_xor_b32 s6, exec_lo, s4 +; CHECK-NEXT: s_cbranch_execz .LBB5_3 +; CHECK-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: .LBB5_2: ; %memmove_fwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_add_co_u32 v96, vcc_lo, v2, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_clause 0xf +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[96:97] offset:224 +; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[96:97] offset:240 +; CHECK-NEXT: flat_load_dwordx4 v[12:15], v[96:97] offset:192 +; CHECK-NEXT: flat_load_dwordx4 v[16:19], v[96:97] offset:208 +; CHECK-NEXT: flat_load_dwordx4 v[20:23], v[96:97] offset:160 +; CHECK-NEXT: flat_load_dwordx4 v[24:27], v[96:97] offset:176 +; CHECK-NEXT: flat_load_dwordx4 v[28:31], v[96:97] offset:128 +; CHECK-NEXT: flat_load_dwordx4 v[32:35], v[96:97] offset:144 +; CHECK-NEXT: flat_load_dwordx4 v[36:39], v[96:97] offset:96 +; CHECK-NEXT: flat_load_dwordx4 v[48:51], v[96:97] offset:112 +; CHECK-NEXT: flat_load_dwordx4 v[52:55], v[96:97] offset:64 +; CHECK-NEXT: flat_load_dwordx4 v[64:67], v[96:97] offset:80 +; CHECK-NEXT: flat_load_dwordx4 v[68:71], v[96:97] offset:32 +; CHECK-NEXT: flat_load_dwordx4 v[80:83], v[96:97] offset:48 +; CHECK-NEXT: flat_load_dwordx4 v[84:87], v[96:97] +; CHECK-NEXT: flat_load_dwordx4 v[96:99], v[96:97] offset:16 +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: s_waitcnt vmcnt(15) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[4:7] offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(14) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[8:11] offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(13) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[12:15] offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(12) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[16:19] offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(11) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[20:23] offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(10) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[24:27] offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(9) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[28:31] offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(8) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[32:35] offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(7) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[36:39] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(6) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[48:51] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(5) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[52:55] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(4) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[64:67] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[68:71] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[80:83] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[84:87] +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[96:99] offset:16 +; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0x800 +; CHECK-NEXT: s_cbranch_scc1 .LBB5_2 +; CHECK-NEXT: .LBB5_3: ; %Flow5 +; CHECK-NEXT: s_andn2_saveexec_b32 s8, s6 +; CHECK-NEXT: s_cbranch_execz .LBB5_6 +; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader +; CHECK-NEXT: s_movk_i32 s6, 0xff00 +; CHECK-NEXT: s_mov_b64 s[4:5], 0x700 +; CHECK-NEXT: s_mov_b32 s7, -1 +; CHECK-NEXT: .LBB5_5: ; %memmove_bwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_add_co_u32 v96, vcc_lo, v2, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_clause 0xf +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[96:97] offset:224 +; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[96:97] offset:240 +; CHECK-NEXT: flat_load_dwordx4 v[12:15], v[96:97] offset:192 +; CHECK-NEXT: flat_load_dwordx4 v[16:19], v[96:97] offset:208 +; CHECK-NEXT: flat_load_dwordx4 v[20:23], v[96:97] offset:160 +; CHECK-NEXT: flat_load_dwordx4 v[24:27], v[96:97] offset:176 +; CHECK-NEXT: flat_load_dwordx4 v[28:31], v[96:97] offset:128 +; CHECK-NEXT: flat_load_dwordx4 v[32:35], v[96:97] offset:144 +; CHECK-NEXT: flat_load_dwordx4 v[36:39], v[96:97] offset:96 +; CHECK-NEXT: flat_load_dwordx4 v[48:51], v[96:97] offset:112 +; CHECK-NEXT: flat_load_dwordx4 v[52:55], v[96:97] offset:64 +; CHECK-NEXT: flat_load_dwordx4 v[64:67], v[96:97] offset:80 +; CHECK-NEXT: flat_load_dwordx4 v[68:71], v[96:97] offset:32 +; CHECK-NEXT: flat_load_dwordx4 v[80:83], v[96:97] offset:48 +; CHECK-NEXT: flat_load_dwordx4 v[84:87], v[96:97] +; CHECK-NEXT: flat_load_dwordx4 v[96:99], v[96:97] offset:16 +; CHECK-NEXT: s_add_u32 s4, s4, 0xffffff00 +; CHECK-NEXT: s_addc_u32 s5, s5, -1 +; CHECK-NEXT: s_waitcnt vmcnt(15) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[4:7] offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(14) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[8:11] offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(13) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[12:15] offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(12) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[16:19] offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(11) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[20:23] offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(10) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[24:27] offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(9) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[28:31] offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(8) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[32:35] offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(7) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[36:39] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(6) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[48:51] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(5) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[52:55] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(4) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[64:67] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[68:71] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[80:83] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[84:87] +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[96:99] offset:16 +; CHECK-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; CHECK-NEXT: s_cbranch_scc0 .LBB5_5 +; CHECK-NEXT: .LBB5_6: ; %Flow6 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memmove_p0_p0_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b32 s4, exec_lo +; ALIGNED-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] +; ALIGNED-NEXT: s_xor_b32 s6, exec_lo, s4 +; ALIGNED-NEXT: s_cbranch_execz .LBB5_3 +; ALIGNED-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; ALIGNED-NEXT: .LBB5_2: ; %memmove_fwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: v_add_co_u32 v20, vcc_lo, v2, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: s_clause 0xf +; ALIGNED-NEXT: flat_load_dwordx4 v[16:19], v[20:21] offset:240 +; ALIGNED-NEXT: flat_load_dwordx4 v[22:25], v[20:21] offset:224 +; ALIGNED-NEXT: flat_load_dwordx4 v[4:7], v[20:21] +; ALIGNED-NEXT: flat_load_dwordx4 v[8:11], v[20:21] offset:16 +; ALIGNED-NEXT: flat_load_dwordx4 v[12:15], v[20:21] offset:32 +; ALIGNED-NEXT: flat_load_dwordx4 v[98:101], v[20:21] offset:48 +; ALIGNED-NEXT: flat_load_dwordx4 v[112:115], v[20:21] offset:64 +; ALIGNED-NEXT: flat_load_dwordx4 v[82:85], v[20:21] offset:80 +; ALIGNED-NEXT: flat_load_dwordx4 v[116:119], v[20:21] offset:96 +; ALIGNED-NEXT: flat_load_dwordx4 v[66:69], v[20:21] offset:112 +; ALIGNED-NEXT: flat_load_dwordx4 v[40:43], v[20:21] offset:128 +; ALIGNED-NEXT: flat_load_dwordx4 v[50:53], v[20:21] offset:144 +; ALIGNED-NEXT: flat_load_dwordx4 v[44:47], v[20:21] offset:160 +; ALIGNED-NEXT: flat_load_dwordx4 v[34:37], v[20:21] offset:176 +; ALIGNED-NEXT: flat_load_dwordx4 v[30:33], v[20:21] offset:192 +; ALIGNED-NEXT: flat_load_dwordx4 v[26:29], v[20:21] offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(15) lgkmcnt(15) +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v21 offset:254 +; ALIGNED-NEXT: flat_store_byte v[16:17], v21 offset:252 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v20 offset:250 +; ALIGNED-NEXT: flat_store_byte v[16:17], v20 offset:248 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v19 offset:246 +; ALIGNED-NEXT: flat_store_byte v[16:17], v19 offset:244 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v18 offset:242 +; ALIGNED-NEXT: flat_store_byte v[16:17], v18 offset:240 +; ALIGNED-NEXT: s_waitcnt lgkmcnt(22) +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 8, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 8, v20 +; ALIGNED-NEXT: s_cmp_lg_u64 s[4:5], 0x800 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v25 offset:238 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:236 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v24 offset:234 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:232 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v23 offset:230 +; ALIGNED-NEXT: flat_store_byte v[16:17], v23 offset:228 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v22 offset:226 +; ALIGNED-NEXT: flat_store_byte v[16:17], v22 offset:224 +; ALIGNED-NEXT: s_waitcnt lgkmcnt(16) +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v29 offset:222 +; ALIGNED-NEXT: flat_store_byte v[16:17], v29 offset:220 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v28 offset:218 +; ALIGNED-NEXT: flat_store_byte v[16:17], v28 offset:216 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v27 offset:214 +; ALIGNED-NEXT: flat_store_byte v[16:17], v27 offset:212 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v26 offset:210 +; ALIGNED-NEXT: flat_store_byte v[16:17], v26 offset:208 +; ALIGNED-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v33 offset:206 +; ALIGNED-NEXT: flat_store_byte v[16:17], v33 offset:204 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v32 offset:202 +; ALIGNED-NEXT: flat_store_byte v[16:17], v32 offset:200 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v31 offset:198 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:196 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v30 offset:194 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:192 +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v37 offset:190 +; ALIGNED-NEXT: flat_store_byte v[16:17], v37 offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v36 offset:186 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:184 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v35 offset:182 +; ALIGNED-NEXT: flat_store_byte v[16:17], v35 offset:180 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v34 offset:178 +; ALIGNED-NEXT: flat_store_byte v[16:17], v34 offset:176 +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v49 offset:174 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:172 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v48 offset:170 +; ALIGNED-NEXT: flat_store_byte v[16:17], v48 offset:168 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v39 offset:166 +; ALIGNED-NEXT: flat_store_byte v[16:17], v39 offset:164 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v38 offset:162 +; ALIGNED-NEXT: flat_store_byte v[16:17], v38 offset:160 +; ALIGNED-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v53 offset:158 +; ALIGNED-NEXT: flat_store_byte v[16:17], v53 offset:156 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v52 offset:154 +; ALIGNED-NEXT: flat_store_byte v[16:17], v52 offset:152 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v51 offset:150 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:148 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v50 offset:146 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:144 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v65 offset:142 +; ALIGNED-NEXT: flat_store_byte v[16:17], v65 offset:140 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v64 offset:138 +; ALIGNED-NEXT: flat_store_byte v[16:17], v64 offset:136 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v55 offset:134 +; ALIGNED-NEXT: flat_store_byte v[16:17], v55 offset:132 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v54 offset:130 +; ALIGNED-NEXT: flat_store_byte v[16:17], v54 offset:128 +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v69 offset:126 +; ALIGNED-NEXT: flat_store_byte v[16:17], v69 offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v68 offset:122 +; ALIGNED-NEXT: flat_store_byte v[16:17], v68 offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v67 offset:118 +; ALIGNED-NEXT: flat_store_byte v[16:17], v67 offset:116 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v66 offset:114 +; ALIGNED-NEXT: flat_store_byte v[16:17], v66 offset:112 +; ALIGNED-NEXT: buffer_store_dword v116, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: buffer_store_dword v117, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_store_dword v118, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_store_dword v119, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v81 offset:110 +; ALIGNED-NEXT: flat_store_byte v[16:17], v81 offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v80 offset:106 +; ALIGNED-NEXT: flat_store_byte v[16:17], v80 offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v71 offset:102 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:100 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v70 offset:98 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:96 +; ALIGNED-NEXT: buffer_store_dword v82, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: buffer_store_dword v83, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_store_dword v85, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v85 offset:94 +; ALIGNED-NEXT: flat_store_byte v[16:17], v85 offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v84 offset:90 +; ALIGNED-NEXT: flat_store_byte v[16:17], v84 offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v83 offset:86 +; ALIGNED-NEXT: flat_store_byte v[16:17], v83 offset:84 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v82 offset:82 +; ALIGNED-NEXT: flat_store_byte v[16:17], v82 offset:80 +; ALIGNED-NEXT: buffer_store_dword v112, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: buffer_store_dword v113, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_store_dword v114, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_store_dword v115, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v97, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: buffer_load_dword v96, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 8, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 8, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 8, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 8, v24 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v97 offset:78 +; ALIGNED-NEXT: flat_store_byte v[16:17], v97 offset:76 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v96 offset:74 +; ALIGNED-NEXT: flat_store_byte v[16:17], v96 offset:72 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v87 offset:70 +; ALIGNED-NEXT: flat_store_byte v[16:17], v87 offset:68 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v86 offset:66 +; ALIGNED-NEXT: flat_store_byte v[16:17], v86 offset:64 +; ALIGNED-NEXT: buffer_store_dword v98, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: buffer_store_dword v99, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_store_dword v100, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v101, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: buffer_load_dword v100, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_load_dword v99, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_load_dword v98, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v101 offset:62 +; ALIGNED-NEXT: flat_store_byte v[16:17], v101 offset:60 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v100 offset:58 +; ALIGNED-NEXT: flat_store_byte v[16:17], v100 offset:56 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v99 offset:54 +; ALIGNED-NEXT: flat_store_byte v[16:17], v99 offset:52 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v98 offset:50 +; ALIGNED-NEXT: flat_store_byte v[16:17], v98 offset:48 +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v15 offset:42 +; ALIGNED-NEXT: flat_store_byte v[16:17], v15 offset:40 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v14 offset:46 +; ALIGNED-NEXT: flat_store_byte v[16:17], v14 offset:44 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v13 offset:34 +; ALIGNED-NEXT: flat_store_byte v[16:17], v13 offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v12 offset:38 +; ALIGNED-NEXT: flat_store_byte v[16:17], v12 offset:36 +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v11 offset:30 +; ALIGNED-NEXT: flat_store_byte v[16:17], v11 offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v10 offset:26 +; ALIGNED-NEXT: flat_store_byte v[16:17], v10 offset:24 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v9 offset:22 +; ALIGNED-NEXT: flat_store_byte v[16:17], v9 offset:20 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v8 offset:18 +; ALIGNED-NEXT: flat_store_byte v[16:17], v8 offset:16 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:247 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v27 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:255 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 8, v23 +; ALIGNED-NEXT: flat_store_byte v[16:17], v21 offset:253 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 24, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 8, v22 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:251 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v29 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 8, v29 +; ALIGNED-NEXT: flat_store_byte v[16:17], v20 offset:249 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 24, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 8, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v27, 8, v27 +; ALIGNED-NEXT: flat_store_byte v[16:17], v19 offset:245 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 24, v26 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v26, 8, v26 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:215 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v51 +; ALIGNED-NEXT: flat_store_byte v[16:17], v113 offset:243 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v33 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 8, v33 +; ALIGNED-NEXT: flat_store_byte v[16:17], v18 offset:241 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 24, v32 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 8, v32 +; ALIGNED-NEXT: flat_store_byte v[16:17], v114 offset:239 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 8, v31 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:237 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v30 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 8, v30 +; ALIGNED-NEXT: flat_store_byte v[16:17], v115 offset:235 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v37 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 8, v37 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:233 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v36 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 8, v36 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:231 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 8, v35 +; ALIGNED-NEXT: flat_store_byte v[16:17], v23 offset:229 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 24, v34 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 8, v34 +; ALIGNED-NEXT: flat_store_byte v[16:17], v21 offset:227 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 24, v49 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v49 +; ALIGNED-NEXT: flat_store_byte v[16:17], v22 offset:225 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 24, v48 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 8, v48 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:223 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v39 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 8, v39 +; ALIGNED-NEXT: flat_store_byte v[16:17], v29 offset:221 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 24, v38 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 8, v38 +; ALIGNED-NEXT: flat_store_byte v[16:17], v20 offset:219 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 24, v53 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 8, v53 +; ALIGNED-NEXT: flat_store_byte v[16:17], v28 offset:217 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 24, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 8, v52 +; ALIGNED-NEXT: flat_store_byte v[16:17], v27 offset:213 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v27, 24, v50 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 8, v50 +; ALIGNED-NEXT: flat_store_byte v[16:17], v19 offset:211 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 24, v65 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v65 +; ALIGNED-NEXT: flat_store_byte v[16:17], v26 offset:209 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v26, 24, v64 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:149 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 8, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 8, v64 +; ALIGNED-NEXT: flat_store_byte v[16:17], v113 offset:207 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v55 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 8, v55 +; ALIGNED-NEXT: flat_store_byte v[16:17], v33 offset:205 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 24, v54 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v54 +; ALIGNED-NEXT: flat_store_byte v[16:17], v18 offset:203 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 24, v69 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v69 +; ALIGNED-NEXT: flat_store_byte v[16:17], v32 offset:201 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 24, v68 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v68 +; ALIGNED-NEXT: flat_store_byte v[16:17], v114 offset:199 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v67 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v67 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:197 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v66 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 8, v66 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:195 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v81 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v81 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:193 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 8, v80 +; ALIGNED-NEXT: flat_store_byte v[16:17], v115 offset:191 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v71 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v71 +; ALIGNED-NEXT: flat_store_byte v[16:17], v37 offset:189 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 24, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v70 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:187 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v85 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v85 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:185 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v84 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 8, v84 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:183 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v83 +; ALIGNED-NEXT: flat_store_byte v[16:17], v35 offset:181 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 24, v82 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 8, v82 +; ALIGNED-NEXT: flat_store_byte v[16:17], v23 offset:179 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 24, v97 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 8, v97 +; ALIGNED-NEXT: flat_store_byte v[16:17], v34 offset:177 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 24, v96 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 8, v96 +; ALIGNED-NEXT: flat_store_byte v[16:17], v21 offset:175 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 24, v87 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v87 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:173 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v86 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 8, v86 +; ALIGNED-NEXT: flat_store_byte v[16:17], v22 offset:171 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 24, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v101 +; ALIGNED-NEXT: flat_store_byte v[16:17], v48 offset:169 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 24, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v100 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:167 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v99 +; ALIGNED-NEXT: flat_store_byte v[16:17], v39 offset:165 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 24, v98 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v98 +; ALIGNED-NEXT: flat_store_byte v[16:17], v29 offset:163 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 24, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v15, 8, v15 +; ALIGNED-NEXT: flat_store_byte v[16:17], v38 offset:161 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 24, v14 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v14, 8, v14 +; ALIGNED-NEXT: flat_store_byte v[16:17], v20 offset:159 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 24, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v13 +; ALIGNED-NEXT: flat_store_byte v[16:17], v53 offset:157 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 24, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v12, 8, v12 +; ALIGNED-NEXT: flat_store_byte v[16:17], v28 offset:155 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 24, v11 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 8, v11 +; ALIGNED-NEXT: flat_store_byte v[16:17], v52 offset:153 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 24, v10 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 8, v10 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:151 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v9 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v9 +; ALIGNED-NEXT: flat_store_byte v[16:17], v27 offset:147 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:145 +; ALIGNED-NEXT: flat_store_byte v[16:17], v19 offset:143 +; ALIGNED-NEXT: flat_store_byte v[16:17], v65 offset:141 +; ALIGNED-NEXT: flat_store_byte v[16:17], v26 offset:139 +; ALIGNED-NEXT: flat_store_byte v[16:17], v64 offset:137 +; ALIGNED-NEXT: flat_store_byte v[16:17], v113 offset:135 +; ALIGNED-NEXT: flat_store_byte v[16:17], v55 offset:133 +; ALIGNED-NEXT: flat_store_byte v[16:17], v33 offset:131 +; ALIGNED-NEXT: flat_store_byte v[16:17], v54 offset:129 +; ALIGNED-NEXT: flat_store_byte v[16:17], v18 offset:127 +; ALIGNED-NEXT: flat_store_byte v[16:17], v69 offset:125 +; ALIGNED-NEXT: flat_store_byte v[16:17], v32 offset:123 +; ALIGNED-NEXT: flat_store_byte v[16:17], v68 offset:121 +; ALIGNED-NEXT: flat_store_byte v[16:17], v114 offset:119 +; ALIGNED-NEXT: flat_store_byte v[16:17], v67 offset:117 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:115 +; ALIGNED-NEXT: flat_store_byte v[16:17], v66 offset:113 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:111 +; ALIGNED-NEXT: flat_store_byte v[16:17], v81 offset:109 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:107 +; ALIGNED-NEXT: flat_store_byte v[16:17], v80 offset:105 +; ALIGNED-NEXT: flat_store_byte v[16:17], v115 offset:103 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:101 +; ALIGNED-NEXT: flat_store_byte v[16:17], v37 offset:99 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:97 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:95 +; ALIGNED-NEXT: flat_store_byte v[16:17], v85 offset:93 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:91 +; ALIGNED-NEXT: flat_store_byte v[16:17], v84 offset:89 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:87 +; ALIGNED-NEXT: flat_store_byte v[16:17], v83 offset:85 +; ALIGNED-NEXT: flat_store_byte v[16:17], v35 offset:83 +; ALIGNED-NEXT: flat_store_byte v[16:17], v82 offset:81 +; ALIGNED-NEXT: flat_store_byte v[16:17], v23 offset:79 +; ALIGNED-NEXT: flat_store_byte v[16:17], v97 offset:77 +; ALIGNED-NEXT: flat_store_byte v[16:17], v34 offset:75 +; ALIGNED-NEXT: flat_store_byte v[16:17], v96 offset:73 +; ALIGNED-NEXT: flat_store_byte v[16:17], v21 offset:71 +; ALIGNED-NEXT: flat_store_byte v[16:17], v87 offset:69 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:67 +; ALIGNED-NEXT: flat_store_byte v[16:17], v86 offset:65 +; ALIGNED-NEXT: flat_store_byte v[16:17], v22 offset:63 +; ALIGNED-NEXT: flat_store_byte v[16:17], v101 offset:61 +; ALIGNED-NEXT: flat_store_byte v[16:17], v48 offset:59 +; ALIGNED-NEXT: flat_store_byte v[16:17], v100 offset:57 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:55 +; ALIGNED-NEXT: flat_store_byte v[16:17], v99 offset:53 +; ALIGNED-NEXT: flat_store_byte v[16:17], v39 offset:51 +; ALIGNED-NEXT: flat_store_byte v[16:17], v98 offset:49 +; ALIGNED-NEXT: flat_store_byte v[16:17], v29 offset:43 +; ALIGNED-NEXT: flat_store_byte v[16:17], v15 offset:41 +; ALIGNED-NEXT: flat_store_byte v[16:17], v38 offset:47 +; ALIGNED-NEXT: flat_store_byte v[16:17], v14 offset:45 +; ALIGNED-NEXT: flat_store_byte v[16:17], v20 offset:35 +; ALIGNED-NEXT: flat_store_byte v[16:17], v13 offset:33 +; ALIGNED-NEXT: flat_store_byte v[16:17], v53 offset:39 +; ALIGNED-NEXT: flat_store_byte v[16:17], v12 offset:37 +; ALIGNED-NEXT: flat_store_byte v[16:17], v28 offset:31 +; ALIGNED-NEXT: flat_store_byte v[16:17], v11 offset:29 +; ALIGNED-NEXT: flat_store_byte v[16:17], v52 offset:27 +; ALIGNED-NEXT: flat_store_byte v[16:17], v10 offset:25 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:23 +; ALIGNED-NEXT: flat_store_byte v[16:17], v9 offset:21 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:19 +; ALIGNED-NEXT: flat_store_byte v[16:17], v8 offset:17 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v7 offset:14 +; ALIGNED-NEXT: flat_store_byte v[16:17], v7 offset:12 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v6 offset:10 +; ALIGNED-NEXT: flat_store_byte v[16:17], v6 offset:8 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v5 offset:6 +; ALIGNED-NEXT: flat_store_byte v[16:17], v5 offset:4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v4 offset:2 +; ALIGNED-NEXT: flat_store_byte v[16:17], v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 24, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v7, 8, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 24, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v6, 8, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 24, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v4, 8, v4 +; ALIGNED-NEXT: flat_store_byte v[16:17], v8 offset:15 +; ALIGNED-NEXT: flat_store_byte v[16:17], v7 offset:13 +; ALIGNED-NEXT: flat_store_byte v[16:17], v9 offset:11 +; ALIGNED-NEXT: flat_store_byte v[16:17], v6 offset:9 +; ALIGNED-NEXT: flat_store_byte v[16:17], v10 offset:7 +; ALIGNED-NEXT: flat_store_byte v[16:17], v5 offset:5 +; ALIGNED-NEXT: flat_store_byte v[16:17], v11 offset:3 +; ALIGNED-NEXT: flat_store_byte v[16:17], v4 offset:1 +; ALIGNED-NEXT: s_cbranch_scc1 .LBB5_2 +; ALIGNED-NEXT: .LBB5_3: ; %Flow5 +; ALIGNED-NEXT: s_andn2_saveexec_b32 s8, s6 +; ALIGNED-NEXT: s_cbranch_execz .LBB5_6 +; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader +; ALIGNED-NEXT: s_movk_i32 s6, 0xff00 +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0x700 +; ALIGNED-NEXT: s_mov_b32 s7, -1 +; ALIGNED-NEXT: .LBB5_5: ; %memmove_bwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: v_add_co_u32 v24, vcc_lo, v2, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v25, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: s_clause 0xf +; ALIGNED-NEXT: flat_load_dwordx4 v[16:19], v[24:25] offset:240 +; ALIGNED-NEXT: flat_load_dwordx4 v[20:23], v[24:25] offset:224 +; ALIGNED-NEXT: flat_load_dwordx4 v[4:7], v[24:25] +; ALIGNED-NEXT: flat_load_dwordx4 v[8:11], v[24:25] offset:16 +; ALIGNED-NEXT: flat_load_dwordx4 v[12:15], v[24:25] offset:32 +; ALIGNED-NEXT: flat_load_dwordx4 v[112:115], v[24:25] offset:48 +; ALIGNED-NEXT: flat_load_dwordx4 v[116:119], v[24:25] offset:64 +; ALIGNED-NEXT: flat_load_dwordx4 v[40:43], v[24:25] offset:80 +; ALIGNED-NEXT: flat_load_dwordx4 v[26:29], v[24:25] offset:96 +; ALIGNED-NEXT: flat_load_dwordx4 v[32:35], v[24:25] offset:112 +; ALIGNED-NEXT: flat_load_dwordx4 v[44:47], v[24:25] offset:128 +; ALIGNED-NEXT: flat_load_dwordx4 v[52:55], v[24:25] offset:144 +; ALIGNED-NEXT: flat_load_dwordx4 v[66:69], v[24:25] offset:160 +; ALIGNED-NEXT: flat_load_dwordx4 v[81:84], v[24:25] offset:176 +; ALIGNED-NEXT: flat_load_dwordx4 v[96:99], v[24:25] offset:192 +; ALIGNED-NEXT: flat_load_dwordx4 v[100:103], v[24:25] offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(15) lgkmcnt(15) +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:320 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:324 +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:328 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:332 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:332 +; ALIGNED-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:328 +; ALIGNED-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:324 +; ALIGNED-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:320 +; ALIGNED-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; ALIGNED-NEXT: s_add_u32 s4, s4, 0xffffff00 +; ALIGNED-NEXT: s_addc_u32 s5, s5, -1 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v31 offset:254 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:252 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v30 offset:250 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:248 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v25 offset:246 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:244 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v24 offset:242 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:240 +; ALIGNED-NEXT: s_waitcnt lgkmcnt(22) +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:336 +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:340 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:344 +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:348 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:348 +; ALIGNED-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:344 +; ALIGNED-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:340 +; ALIGNED-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:336 +; ALIGNED-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v51 offset:238 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:236 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v50 offset:234 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:232 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v49 offset:230 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:228 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v36 offset:226 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:224 +; ALIGNED-NEXT: s_waitcnt lgkmcnt(16) +; ALIGNED-NEXT: buffer_store_dword v100, off, s[0:3], s32 offset:288 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:292 +; ALIGNED-NEXT: buffer_store_dword v102, off, s[0:3], s32 offset:296 +; ALIGNED-NEXT: buffer_store_dword v103, off, s[0:3], s32 offset:300 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:300 +; ALIGNED-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:296 +; ALIGNED-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:292 +; ALIGNED-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:288 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 8, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v30 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 8, v30 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v71 offset:222 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:220 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v70 offset:218 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:216 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v65 offset:214 +; ALIGNED-NEXT: flat_store_byte v[16:17], v65 offset:212 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v64 offset:210 +; ALIGNED-NEXT: flat_store_byte v[16:17], v64 offset:208 +; ALIGNED-NEXT: buffer_store_dword v96, off, s[0:3], s32 offset:304 +; ALIGNED-NEXT: buffer_store_dword v97, off, s[0:3], s32 offset:308 +; ALIGNED-NEXT: buffer_store_dword v98, off, s[0:3], s32 offset:312 +; ALIGNED-NEXT: buffer_store_dword v99, off, s[0:3], s32 offset:316 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:316 +; ALIGNED-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:312 +; ALIGNED-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:308 +; ALIGNED-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:304 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v87 offset:206 +; ALIGNED-NEXT: flat_store_byte v[16:17], v87 offset:204 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v86 offset:202 +; ALIGNED-NEXT: flat_store_byte v[16:17], v86 offset:200 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v85 offset:198 +; ALIGNED-NEXT: flat_store_byte v[16:17], v85 offset:196 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v80 offset:194 +; ALIGNED-NEXT: flat_store_byte v[16:17], v80 offset:192 +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:384 +; ALIGNED-NEXT: buffer_store_dword v82, off, s[0:3], s32 offset:388 +; ALIGNED-NEXT: buffer_store_dword v83, off, s[0:3], s32 offset:392 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:396 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v101, off, s[0:3], s32 offset:396 +; ALIGNED-NEXT: buffer_load_dword v99, off, s[0:3], s32 offset:392 +; ALIGNED-NEXT: buffer_load_dword v96, off, s[0:3], s32 offset:388 +; ALIGNED-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:384 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v101 offset:190 +; ALIGNED-NEXT: flat_store_byte v[16:17], v101 offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v99 offset:186 +; ALIGNED-NEXT: flat_store_byte v[16:17], v99 offset:184 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v96 offset:182 +; ALIGNED-NEXT: flat_store_byte v[16:17], v96 offset:180 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v81 offset:178 +; ALIGNED-NEXT: flat_store_byte v[16:17], v81 offset:176 +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:400 +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:404 +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:408 +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:412 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v100, off, s[0:3], s32 offset:412 +; ALIGNED-NEXT: buffer_load_dword v97, off, s[0:3], s32 offset:408 +; ALIGNED-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:404 +; ALIGNED-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:400 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v100 offset:174 +; ALIGNED-NEXT: flat_store_byte v[16:17], v100 offset:172 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v97 offset:170 +; ALIGNED-NEXT: flat_store_byte v[16:17], v97 offset:168 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v82 offset:166 +; ALIGNED-NEXT: flat_store_byte v[16:17], v82 offset:164 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v66 offset:162 +; ALIGNED-NEXT: flat_store_byte v[16:17], v66 offset:160 +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:352 +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:356 +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:360 +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:364 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v98, off, s[0:3], s32 offset:364 +; ALIGNED-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:360 +; ALIGNED-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:356 +; ALIGNED-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:352 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v98 offset:158 +; ALIGNED-NEXT: flat_store_byte v[16:17], v98 offset:156 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v83 offset:154 +; ALIGNED-NEXT: flat_store_byte v[16:17], v83 offset:152 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v67 offset:150 +; ALIGNED-NEXT: flat_store_byte v[16:17], v67 offset:148 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v52 offset:146 +; ALIGNED-NEXT: flat_store_byte v[16:17], v52 offset:144 +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:368 +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:372 +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:376 +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:380 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:380 +; ALIGNED-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:376 +; ALIGNED-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:372 +; ALIGNED-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:368 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v84 offset:142 +; ALIGNED-NEXT: flat_store_byte v[16:17], v84 offset:140 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v68 offset:138 +; ALIGNED-NEXT: flat_store_byte v[16:17], v68 offset:136 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v53 offset:134 +; ALIGNED-NEXT: flat_store_byte v[16:17], v53 offset:132 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v37 offset:130 +; ALIGNED-NEXT: flat_store_byte v[16:17], v37 offset:128 +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:448 +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:452 +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:456 +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:460 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:460 +; ALIGNED-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:456 +; ALIGNED-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:452 +; ALIGNED-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:448 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v69 offset:126 +; ALIGNED-NEXT: flat_store_byte v[16:17], v69 offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v54 offset:122 +; ALIGNED-NEXT: flat_store_byte v[16:17], v54 offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v38 offset:118 +; ALIGNED-NEXT: flat_store_byte v[16:17], v38 offset:116 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v32 offset:114 +; ALIGNED-NEXT: flat_store_byte v[16:17], v32 offset:112 +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:464 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:468 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:472 +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:476 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:476 +; ALIGNED-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:472 +; ALIGNED-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:468 +; ALIGNED-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:464 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v55 offset:110 +; ALIGNED-NEXT: flat_store_byte v[16:17], v55 offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v39 offset:106 +; ALIGNED-NEXT: flat_store_byte v[16:17], v39 offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v33 offset:102 +; ALIGNED-NEXT: flat_store_byte v[16:17], v33 offset:100 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v26 offset:98 +; ALIGNED-NEXT: flat_store_byte v[16:17], v26 offset:96 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:416 +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:420 +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:424 +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:428 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:428 +; ALIGNED-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:424 +; ALIGNED-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:420 +; ALIGNED-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:416 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v48 offset:94 +; ALIGNED-NEXT: flat_store_byte v[16:17], v48 offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v34 offset:90 +; ALIGNED-NEXT: flat_store_byte v[16:17], v34 offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v27 offset:86 +; ALIGNED-NEXT: flat_store_byte v[16:17], v27 offset:84 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v21 offset:82 +; ALIGNED-NEXT: flat_store_byte v[16:17], v21 offset:80 +; ALIGNED-NEXT: buffer_store_dword v116, off, s[0:3], s32 offset:432 +; ALIGNED-NEXT: buffer_store_dword v117, off, s[0:3], s32 offset:436 +; ALIGNED-NEXT: buffer_store_dword v118, off, s[0:3], s32 offset:440 +; ALIGNED-NEXT: buffer_store_dword v119, off, s[0:3], s32 offset:444 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:444 +; ALIGNED-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:440 +; ALIGNED-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:436 +; ALIGNED-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:432 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v35 offset:78 +; ALIGNED-NEXT: flat_store_byte v[16:17], v35 offset:76 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v28 offset:74 +; ALIGNED-NEXT: flat_store_byte v[16:17], v28 offset:72 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v22 offset:70 +; ALIGNED-NEXT: flat_store_byte v[16:17], v22 offset:68 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v19 offset:66 +; ALIGNED-NEXT: flat_store_byte v[16:17], v19 offset:64 +; ALIGNED-NEXT: buffer_store_dword v112, off, s[0:3], s32 offset:512 +; ALIGNED-NEXT: buffer_store_dword v113, off, s[0:3], s32 offset:516 +; ALIGNED-NEXT: buffer_store_dword v114, off, s[0:3], s32 offset:520 +; ALIGNED-NEXT: buffer_store_dword v115, off, s[0:3], s32 offset:524 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:524 +; ALIGNED-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:520 +; ALIGNED-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:516 +; ALIGNED-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:512 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 8, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 8, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v50 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 8, v50 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v29 offset:62 +; ALIGNED-NEXT: flat_store_byte v[16:17], v29 offset:60 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v23 offset:58 +; ALIGNED-NEXT: flat_store_byte v[16:17], v23 offset:56 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v20 offset:54 +; ALIGNED-NEXT: flat_store_byte v[16:17], v20 offset:52 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v18 offset:50 +; ALIGNED-NEXT: flat_store_byte v[16:17], v18 offset:48 +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:528 +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:532 +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:536 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:540 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:536 +; ALIGNED-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:540 +; ALIGNED-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:528 +; ALIGNED-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:532 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v15 offset:42 +; ALIGNED-NEXT: flat_store_byte v[16:17], v15 offset:40 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v14 offset:46 +; ALIGNED-NEXT: flat_store_byte v[16:17], v14 offset:44 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v13 offset:34 +; ALIGNED-NEXT: flat_store_byte v[16:17], v13 offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v12 offset:38 +; ALIGNED-NEXT: flat_store_byte v[16:17], v12 offset:36 +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:480 +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:484 +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:488 +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:492 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:492 +; ALIGNED-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:488 +; ALIGNED-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:484 +; ALIGNED-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:480 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v11 offset:30 +; ALIGNED-NEXT: flat_store_byte v[16:17], v11 offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v10 offset:26 +; ALIGNED-NEXT: flat_store_byte v[16:17], v10 offset:24 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v9 offset:22 +; ALIGNED-NEXT: flat_store_byte v[16:17], v9 offset:20 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v8 offset:18 +; ALIGNED-NEXT: flat_store_byte v[16:17], v8 offset:16 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:496 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:500 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:504 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:508 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:508 +; ALIGNED-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:504 +; ALIGNED-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:500 +; ALIGNED-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:496 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:247 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v65 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:255 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v49 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v49 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:253 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v36 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 8, v36 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:251 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v71 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v71 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:249 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v65 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:245 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v64 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 8, v64 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:215 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v67 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v67 +; ALIGNED-NEXT: flat_store_byte v[16:17], v113 offset:243 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v87 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v87 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:241 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v86 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 8, v86 +; ALIGNED-NEXT: flat_store_byte v[16:17], v114 offset:239 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v85 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v85 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:237 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 8, v80 +; ALIGNED-NEXT: flat_store_byte v[16:17], v115 offset:235 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v101 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:233 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v99 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:231 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v96 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 8, v96 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:229 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v81 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v81 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:227 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v100 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:225 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v97 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 8, v97 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:223 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v82 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 8, v82 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:221 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v66 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 8, v66 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:219 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v98 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v98 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:217 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v83 +; ALIGNED-NEXT: flat_store_byte v[16:17], v65 offset:213 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 24, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 8, v52 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:211 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v84 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 8, v84 +; ALIGNED-NEXT: flat_store_byte v[16:17], v64 offset:209 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v68 +; ALIGNED-NEXT: flat_store_byte v[16:17], v67 offset:149 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 24, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 8, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v68 +; ALIGNED-NEXT: flat_store_byte v[16:17], v113 offset:207 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v53 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 8, v53 +; ALIGNED-NEXT: flat_store_byte v[16:17], v87 offset:205 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 24, v37 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 8, v37 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:203 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v69 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v69 +; ALIGNED-NEXT: flat_store_byte v[16:17], v86 offset:201 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v54 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v54 +; ALIGNED-NEXT: flat_store_byte v[16:17], v114 offset:199 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v38 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 8, v38 +; ALIGNED-NEXT: flat_store_byte v[16:17], v85 offset:197 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 24, v32 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 8, v32 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:195 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v55 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 8, v55 +; ALIGNED-NEXT: flat_store_byte v[16:17], v80 offset:193 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v39 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 8, v39 +; ALIGNED-NEXT: flat_store_byte v[16:17], v115 offset:191 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v33 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 8, v33 +; ALIGNED-NEXT: flat_store_byte v[16:17], v101 offset:189 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v26 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v26, 8, v26 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:187 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v48 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 8, v48 +; ALIGNED-NEXT: flat_store_byte v[16:17], v99 offset:185 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v34 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 8, v34 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:183 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v27, 8, v27 +; ALIGNED-NEXT: flat_store_byte v[16:17], v96 offset:181 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 24, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 8, v21 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:179 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 8, v35 +; ALIGNED-NEXT: flat_store_byte v[16:17], v81 offset:177 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 24, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 8, v28 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:175 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 8, v22 +; ALIGNED-NEXT: flat_store_byte v[16:17], v100 offset:173 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 24, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 8, v19 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:171 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v29 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 8, v29 +; ALIGNED-NEXT: flat_store_byte v[16:17], v97 offset:169 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 24, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 8, v23 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:167 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 8, v20 +; ALIGNED-NEXT: flat_store_byte v[16:17], v82 offset:165 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 8, v18 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:163 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v15, 8, v15 +; ALIGNED-NEXT: flat_store_byte v[16:17], v66 offset:161 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v14 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v14, 8, v14 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:159 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v13 +; ALIGNED-NEXT: flat_store_byte v[16:17], v98 offset:157 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v12, 8, v12 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:155 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v11 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 8, v11 +; ALIGNED-NEXT: flat_store_byte v[16:17], v83 offset:153 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 24, v10 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 8, v10 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:151 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v9 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v9 +; ALIGNED-NEXT: flat_store_byte v[16:17], v65 offset:147 +; ALIGNED-NEXT: flat_store_byte v[16:17], v52 offset:145 +; ALIGNED-NEXT: flat_store_byte v[16:17], v25 offset:143 +; ALIGNED-NEXT: flat_store_byte v[16:17], v84 offset:141 +; ALIGNED-NEXT: flat_store_byte v[16:17], v64 offset:139 +; ALIGNED-NEXT: flat_store_byte v[16:17], v68 offset:137 +; ALIGNED-NEXT: flat_store_byte v[16:17], v113 offset:135 +; ALIGNED-NEXT: flat_store_byte v[16:17], v53 offset:133 +; ALIGNED-NEXT: flat_store_byte v[16:17], v87 offset:131 +; ALIGNED-NEXT: flat_store_byte v[16:17], v37 offset:129 +; ALIGNED-NEXT: flat_store_byte v[16:17], v24 offset:127 +; ALIGNED-NEXT: flat_store_byte v[16:17], v69 offset:125 +; ALIGNED-NEXT: flat_store_byte v[16:17], v86 offset:123 +; ALIGNED-NEXT: flat_store_byte v[16:17], v54 offset:121 +; ALIGNED-NEXT: flat_store_byte v[16:17], v114 offset:119 +; ALIGNED-NEXT: flat_store_byte v[16:17], v38 offset:117 +; ALIGNED-NEXT: flat_store_byte v[16:17], v85 offset:115 +; ALIGNED-NEXT: flat_store_byte v[16:17], v32 offset:113 +; ALIGNED-NEXT: flat_store_byte v[16:17], v51 offset:111 +; ALIGNED-NEXT: flat_store_byte v[16:17], v55 offset:109 +; ALIGNED-NEXT: flat_store_byte v[16:17], v80 offset:107 +; ALIGNED-NEXT: flat_store_byte v[16:17], v39 offset:105 +; ALIGNED-NEXT: flat_store_byte v[16:17], v115 offset:103 +; ALIGNED-NEXT: flat_store_byte v[16:17], v33 offset:101 +; ALIGNED-NEXT: flat_store_byte v[16:17], v101 offset:99 +; ALIGNED-NEXT: flat_store_byte v[16:17], v26 offset:97 +; ALIGNED-NEXT: flat_store_byte v[16:17], v50 offset:95 +; ALIGNED-NEXT: flat_store_byte v[16:17], v48 offset:93 +; ALIGNED-NEXT: flat_store_byte v[16:17], v99 offset:91 +; ALIGNED-NEXT: flat_store_byte v[16:17], v34 offset:89 +; ALIGNED-NEXT: flat_store_byte v[16:17], v102 offset:87 +; ALIGNED-NEXT: flat_store_byte v[16:17], v27 offset:85 +; ALIGNED-NEXT: flat_store_byte v[16:17], v96 offset:83 +; ALIGNED-NEXT: flat_store_byte v[16:17], v21 offset:81 +; ALIGNED-NEXT: flat_store_byte v[16:17], v49 offset:79 +; ALIGNED-NEXT: flat_store_byte v[16:17], v35 offset:77 +; ALIGNED-NEXT: flat_store_byte v[16:17], v81 offset:75 +; ALIGNED-NEXT: flat_store_byte v[16:17], v28 offset:73 +; ALIGNED-NEXT: flat_store_byte v[16:17], v31 offset:71 +; ALIGNED-NEXT: flat_store_byte v[16:17], v22 offset:69 +; ALIGNED-NEXT: flat_store_byte v[16:17], v100 offset:67 +; ALIGNED-NEXT: flat_store_byte v[16:17], v19 offset:65 +; ALIGNED-NEXT: flat_store_byte v[16:17], v36 offset:63 +; ALIGNED-NEXT: flat_store_byte v[16:17], v29 offset:61 +; ALIGNED-NEXT: flat_store_byte v[16:17], v97 offset:59 +; ALIGNED-NEXT: flat_store_byte v[16:17], v23 offset:57 +; ALIGNED-NEXT: flat_store_byte v[16:17], v103 offset:55 +; ALIGNED-NEXT: flat_store_byte v[16:17], v20 offset:53 +; ALIGNED-NEXT: flat_store_byte v[16:17], v82 offset:51 +; ALIGNED-NEXT: flat_store_byte v[16:17], v18 offset:49 +; ALIGNED-NEXT: flat_store_byte v[16:17], v71 offset:43 +; ALIGNED-NEXT: flat_store_byte v[16:17], v15 offset:41 +; ALIGNED-NEXT: flat_store_byte v[16:17], v66 offset:47 +; ALIGNED-NEXT: flat_store_byte v[16:17], v14 offset:45 +; ALIGNED-NEXT: flat_store_byte v[16:17], v30 offset:35 +; ALIGNED-NEXT: flat_store_byte v[16:17], v13 offset:33 +; ALIGNED-NEXT: flat_store_byte v[16:17], v98 offset:39 +; ALIGNED-NEXT: flat_store_byte v[16:17], v12 offset:37 +; ALIGNED-NEXT: flat_store_byte v[16:17], v70 offset:31 +; ALIGNED-NEXT: flat_store_byte v[16:17], v11 offset:29 +; ALIGNED-NEXT: flat_store_byte v[16:17], v83 offset:27 +; ALIGNED-NEXT: flat_store_byte v[16:17], v10 offset:25 +; ALIGNED-NEXT: flat_store_byte v[16:17], v112 offset:23 +; ALIGNED-NEXT: flat_store_byte v[16:17], v9 offset:21 +; ALIGNED-NEXT: flat_store_byte v[16:17], v67 offset:19 +; ALIGNED-NEXT: flat_store_byte v[16:17], v8 offset:17 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v7 offset:14 +; ALIGNED-NEXT: flat_store_byte v[16:17], v7 offset:12 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v6 offset:10 +; ALIGNED-NEXT: flat_store_byte v[16:17], v6 offset:8 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v5 offset:6 +; ALIGNED-NEXT: flat_store_byte v[16:17], v5 offset:4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte_d16_hi v[16:17], v4 offset:2 +; ALIGNED-NEXT: flat_store_byte v[16:17], v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 24, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v7, 8, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 24, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v6, 8, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 24, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v4, 8, v4 +; ALIGNED-NEXT: flat_store_byte v[16:17], v8 offset:15 +; ALIGNED-NEXT: flat_store_byte v[16:17], v7 offset:13 +; ALIGNED-NEXT: flat_store_byte v[16:17], v9 offset:11 +; ALIGNED-NEXT: flat_store_byte v[16:17], v6 offset:9 +; ALIGNED-NEXT: flat_store_byte v[16:17], v10 offset:7 +; ALIGNED-NEXT: flat_store_byte v[16:17], v5 offset:5 +; ALIGNED-NEXT: flat_store_byte v[16:17], v11 offset:3 +; ALIGNED-NEXT: flat_store_byte v[16:17], v4 offset:1 +; ALIGNED-NEXT: s_cbranch_scc0 .LBB5_5 +; ALIGNED-NEXT: .LBB5_6: ; %Flow6 +; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 +; ALIGNED-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memmove_p0_p0_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: s_mov_b32 s4, exec_lo +; UNROLL3-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] +; UNROLL3-NEXT: s_xor_b32 s6, exec_lo, s4 +; UNROLL3-NEXT: s_cbranch_execz .LBB5_4 +; UNROLL3-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB5_2: ; %memmove_fwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: v_add_co_u32 v12, vcc_lo, v2, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_clause 0x2 +; UNROLL3-NEXT: flat_load_dwordx4 v[4:7], v[12:13] +; UNROLL3-NEXT: flat_load_dwordx4 v[8:11], v[12:13] offset:16 +; UNROLL3-NEXT: flat_load_dwordx4 v[12:15], v[12:13] offset:32 +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[4:7] +; UNROLL3-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[8:11] offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[12:15] offset:32 +; UNROLL3-NEXT: s_cmp_lg_u64 s[4:5], 0x7e0 +; UNROLL3-NEXT: s_cbranch_scc1 .LBB5_2 +; UNROLL3-NEXT: ; %bb.3: ; %memmove_fwd_residual +; UNROLL3-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:2016 +; UNROLL3-NEXT: flat_load_dwordx4 v[2:5], v[2:3] offset:2032 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:2032 +; UNROLL3-NEXT: ; implicit-def: $vgpr2_vgpr3 +; UNROLL3-NEXT: ; implicit-def: $vgpr0_vgpr1 +; UNROLL3-NEXT: .LBB5_4: ; %Flow3 +; UNROLL3-NEXT: s_andn2_saveexec_b32 s8, s6 +; UNROLL3-NEXT: s_cbranch_execz .LBB5_7 +; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual +; UNROLL3-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:2032 +; UNROLL3-NEXT: s_movk_i32 s6, 0xffd0 +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0x7b0 +; UNROLL3-NEXT: s_mov_b32 s7, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:2032 +; UNROLL3-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:2016 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB5_6: ; %memmove_bwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: v_add_co_u32 v12, vcc_lo, v2, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_clause 0x2 +; UNROLL3-NEXT: flat_load_dwordx4 v[4:7], v[12:13] +; UNROLL3-NEXT: flat_load_dwordx4 v[8:11], v[12:13] offset:16 +; UNROLL3-NEXT: flat_load_dwordx4 v[12:15], v[12:13] offset:32 +; UNROLL3-NEXT: s_add_u32 s4, s4, 0xffffffd0 +; UNROLL3-NEXT: s_addc_u32 s5, s5, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[4:7] +; UNROLL3-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[8:11] offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[12:15] offset:32 +; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; UNROLL3-NEXT: s_cbranch_scc0 .LBB5_6 +; UNROLL3-NEXT: .LBB5_7: ; %Flow4 +; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; UNROLL3-NEXT: s_waitcnt lgkmcnt(0) +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + +define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { +; CHECK-LABEL: memmove_p1_p1_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s4, exec_lo +; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] +; CHECK-NEXT: s_xor_b32 s6, exec_lo, s4 +; CHECK-NEXT: s_cbranch_execz .LBB6_3 +; CHECK-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: .LBB6_2: ; %memmove_fwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_add_co_u32 v96, vcc_lo, v2, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_clause 0xf +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[96:97], off offset:224 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[96:97], off offset:240 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v[96:97], off offset:192 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v[96:97], off offset:208 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v[96:97], off offset:160 +; CHECK-NEXT: global_load_dwordx4 v[24:27], v[96:97], off offset:176 +; CHECK-NEXT: global_load_dwordx4 v[28:31], v[96:97], off offset:128 +; CHECK-NEXT: global_load_dwordx4 v[32:35], v[96:97], off offset:144 +; CHECK-NEXT: global_load_dwordx4 v[36:39], v[96:97], off offset:96 +; CHECK-NEXT: global_load_dwordx4 v[48:51], v[96:97], off offset:112 +; CHECK-NEXT: global_load_dwordx4 v[52:55], v[96:97], off offset:64 +; CHECK-NEXT: global_load_dwordx4 v[64:67], v[96:97], off offset:80 +; CHECK-NEXT: global_load_dwordx4 v[68:71], v[96:97], off offset:32 +; CHECK-NEXT: global_load_dwordx4 v[80:83], v[96:97], off offset:48 +; CHECK-NEXT: global_load_dwordx4 v[84:87], v[96:97], off +; CHECK-NEXT: global_load_dwordx4 v[96:99], v[96:97], off offset:16 +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: s_waitcnt vmcnt(15) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[4:7], off offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(14) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[8:11], off offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[12:15], off offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[16:19], off offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(11) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[20:23], off offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(10) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[24:27], off offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[28:31], off offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[32:35], off offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(7) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[36:39], off offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[48:51], off offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[52:55], off offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[64:67], off offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[68:71], off offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[80:83], off offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[84:87], off +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16 +; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0x800 +; CHECK-NEXT: s_cbranch_scc1 .LBB6_2 +; CHECK-NEXT: .LBB6_3: ; %Flow9 +; CHECK-NEXT: s_andn2_saveexec_b32 s8, s6 +; CHECK-NEXT: s_cbranch_execz .LBB6_6 +; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader +; CHECK-NEXT: s_movk_i32 s6, 0xff00 +; CHECK-NEXT: s_mov_b64 s[4:5], 0x700 +; CHECK-NEXT: s_mov_b32 s7, -1 +; CHECK-NEXT: .LBB6_5: ; %memmove_bwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_add_co_u32 v96, vcc_lo, v2, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_clause 0xf +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[96:97], off offset:224 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[96:97], off offset:240 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v[96:97], off offset:192 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v[96:97], off offset:208 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v[96:97], off offset:160 +; CHECK-NEXT: global_load_dwordx4 v[24:27], v[96:97], off offset:176 +; CHECK-NEXT: global_load_dwordx4 v[28:31], v[96:97], off offset:128 +; CHECK-NEXT: global_load_dwordx4 v[32:35], v[96:97], off offset:144 +; CHECK-NEXT: global_load_dwordx4 v[36:39], v[96:97], off offset:96 +; CHECK-NEXT: global_load_dwordx4 v[48:51], v[96:97], off offset:112 +; CHECK-NEXT: global_load_dwordx4 v[52:55], v[96:97], off offset:64 +; CHECK-NEXT: global_load_dwordx4 v[64:67], v[96:97], off offset:80 +; CHECK-NEXT: global_load_dwordx4 v[68:71], v[96:97], off offset:32 +; CHECK-NEXT: global_load_dwordx4 v[80:83], v[96:97], off offset:48 +; CHECK-NEXT: global_load_dwordx4 v[84:87], v[96:97], off +; CHECK-NEXT: global_load_dwordx4 v[96:99], v[96:97], off offset:16 +; CHECK-NEXT: s_add_u32 s4, s4, 0xffffff00 +; CHECK-NEXT: s_addc_u32 s5, s5, -1 +; CHECK-NEXT: s_waitcnt vmcnt(15) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[4:7], off offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(14) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[8:11], off offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[12:15], off offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[16:19], off offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(11) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[20:23], off offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(10) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[24:27], off offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[28:31], off offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[32:35], off offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(7) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[36:39], off offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[48:51], off offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[52:55], off offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[64:67], off offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[68:71], off offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[80:83], off offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[84:87], off +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16 +; CHECK-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; CHECK-NEXT: s_cbranch_scc0 .LBB6_5 +; CHECK-NEXT: .LBB6_6: ; %Flow10 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memmove_p1_p1_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b32 s4, exec_lo +; ALIGNED-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] +; ALIGNED-NEXT: s_xor_b32 s6, exec_lo, s4 +; ALIGNED-NEXT: s_cbranch_execz .LBB6_3 +; ALIGNED-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; ALIGNED-NEXT: .LBB6_2: ; %memmove_fwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: v_add_co_u32 v20, vcc_lo, v2, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: s_clause 0xf +; ALIGNED-NEXT: global_load_dwordx4 v[16:19], v[20:21], off offset:240 +; ALIGNED-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:224 +; ALIGNED-NEXT: global_load_dwordx4 v[4:7], v[20:21], off +; ALIGNED-NEXT: global_load_dwordx4 v[8:11], v[20:21], off offset:16 +; ALIGNED-NEXT: global_load_dwordx4 v[12:15], v[20:21], off offset:32 +; ALIGNED-NEXT: global_load_dwordx4 v[98:101], v[20:21], off offset:48 +; ALIGNED-NEXT: global_load_dwordx4 v[112:115], v[20:21], off offset:64 +; ALIGNED-NEXT: global_load_dwordx4 v[82:85], v[20:21], off offset:80 +; ALIGNED-NEXT: global_load_dwordx4 v[116:119], v[20:21], off offset:96 +; ALIGNED-NEXT: global_load_dwordx4 v[66:69], v[20:21], off offset:112 +; ALIGNED-NEXT: global_load_dwordx4 v[40:43], v[20:21], off offset:128 +; ALIGNED-NEXT: global_load_dwordx4 v[50:53], v[20:21], off offset:144 +; ALIGNED-NEXT: global_load_dwordx4 v[44:47], v[20:21], off offset:160 +; ALIGNED-NEXT: global_load_dwordx4 v[34:37], v[20:21], off offset:176 +; ALIGNED-NEXT: global_load_dwordx4 v[30:33], v[20:21], off offset:192 +; ALIGNED-NEXT: global_load_dwordx4 v[26:29], v[20:21], off offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(15) +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v21, off offset:254 +; ALIGNED-NEXT: global_store_byte v[16:17], v21, off offset:252 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v20, off offset:250 +; ALIGNED-NEXT: global_store_byte v[16:17], v20, off offset:248 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v19, off offset:246 +; ALIGNED-NEXT: global_store_byte v[16:17], v19, off offset:244 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v18, off offset:242 +; ALIGNED-NEXT: global_store_byte v[16:17], v18, off offset:240 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 8, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 8, v20 +; ALIGNED-NEXT: s_cmp_lg_u64 s[4:5], 0x800 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v25, off offset:238 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:236 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v24, off offset:234 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:232 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v23, off offset:230 +; ALIGNED-NEXT: global_store_byte v[16:17], v23, off offset:228 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v22, off offset:226 +; ALIGNED-NEXT: global_store_byte v[16:17], v22, off offset:224 +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v29, off offset:222 +; ALIGNED-NEXT: global_store_byte v[16:17], v29, off offset:220 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v28, off offset:218 +; ALIGNED-NEXT: global_store_byte v[16:17], v28, off offset:216 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v27, off offset:214 +; ALIGNED-NEXT: global_store_byte v[16:17], v27, off offset:212 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v26, off offset:210 +; ALIGNED-NEXT: global_store_byte v[16:17], v26, off offset:208 +; ALIGNED-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v33, off offset:206 +; ALIGNED-NEXT: global_store_byte v[16:17], v33, off offset:204 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v32, off offset:202 +; ALIGNED-NEXT: global_store_byte v[16:17], v32, off offset:200 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v31, off offset:198 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:196 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v30, off offset:194 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:192 +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v37, off offset:190 +; ALIGNED-NEXT: global_store_byte v[16:17], v37, off offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v36, off offset:186 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:184 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v35, off offset:182 +; ALIGNED-NEXT: global_store_byte v[16:17], v35, off offset:180 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v34, off offset:178 +; ALIGNED-NEXT: global_store_byte v[16:17], v34, off offset:176 +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v49, off offset:174 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:172 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v48, off offset:170 +; ALIGNED-NEXT: global_store_byte v[16:17], v48, off offset:168 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v39, off offset:166 +; ALIGNED-NEXT: global_store_byte v[16:17], v39, off offset:164 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v38, off offset:162 +; ALIGNED-NEXT: global_store_byte v[16:17], v38, off offset:160 +; ALIGNED-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v53, off offset:158 +; ALIGNED-NEXT: global_store_byte v[16:17], v53, off offset:156 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v52, off offset:154 +; ALIGNED-NEXT: global_store_byte v[16:17], v52, off offset:152 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v51, off offset:150 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:148 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v50, off offset:146 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:144 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v65, off offset:142 +; ALIGNED-NEXT: global_store_byte v[16:17], v65, off offset:140 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v64, off offset:138 +; ALIGNED-NEXT: global_store_byte v[16:17], v64, off offset:136 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v55, off offset:134 +; ALIGNED-NEXT: global_store_byte v[16:17], v55, off offset:132 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v54, off offset:130 +; ALIGNED-NEXT: global_store_byte v[16:17], v54, off offset:128 +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v69, off offset:126 +; ALIGNED-NEXT: global_store_byte v[16:17], v69, off offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v68, off offset:122 +; ALIGNED-NEXT: global_store_byte v[16:17], v68, off offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v67, off offset:118 +; ALIGNED-NEXT: global_store_byte v[16:17], v67, off offset:116 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v66, off offset:114 +; ALIGNED-NEXT: global_store_byte v[16:17], v66, off offset:112 +; ALIGNED-NEXT: buffer_store_dword v116, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: buffer_store_dword v117, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_store_dword v118, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_store_dword v119, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v81, off offset:110 +; ALIGNED-NEXT: global_store_byte v[16:17], v81, off offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v80, off offset:106 +; ALIGNED-NEXT: global_store_byte v[16:17], v80, off offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v71, off offset:102 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:100 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v70, off offset:98 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:96 +; ALIGNED-NEXT: buffer_store_dword v82, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: buffer_store_dword v83, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_store_dword v85, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v85, off offset:94 +; ALIGNED-NEXT: global_store_byte v[16:17], v85, off offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v84, off offset:90 +; ALIGNED-NEXT: global_store_byte v[16:17], v84, off offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v83, off offset:86 +; ALIGNED-NEXT: global_store_byte v[16:17], v83, off offset:84 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v82, off offset:82 +; ALIGNED-NEXT: global_store_byte v[16:17], v82, off offset:80 +; ALIGNED-NEXT: buffer_store_dword v112, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: buffer_store_dword v113, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_store_dword v114, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_store_dword v115, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v97, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: buffer_load_dword v96, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 8, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 8, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 8, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 8, v24 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v97, off offset:78 +; ALIGNED-NEXT: global_store_byte v[16:17], v97, off offset:76 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v96, off offset:74 +; ALIGNED-NEXT: global_store_byte v[16:17], v96, off offset:72 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v87, off offset:70 +; ALIGNED-NEXT: global_store_byte v[16:17], v87, off offset:68 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v86, off offset:66 +; ALIGNED-NEXT: global_store_byte v[16:17], v86, off offset:64 +; ALIGNED-NEXT: buffer_store_dword v98, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: buffer_store_dword v99, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_store_dword v100, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v101, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: buffer_load_dword v100, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_load_dword v99, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_load_dword v98, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v101, off offset:62 +; ALIGNED-NEXT: global_store_byte v[16:17], v101, off offset:60 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v100, off offset:58 +; ALIGNED-NEXT: global_store_byte v[16:17], v100, off offset:56 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v99, off offset:54 +; ALIGNED-NEXT: global_store_byte v[16:17], v99, off offset:52 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v98, off offset:50 +; ALIGNED-NEXT: global_store_byte v[16:17], v98, off offset:48 +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v15, off offset:42 +; ALIGNED-NEXT: global_store_byte v[16:17], v15, off offset:40 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v14, off offset:46 +; ALIGNED-NEXT: global_store_byte v[16:17], v14, off offset:44 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v13, off offset:34 +; ALIGNED-NEXT: global_store_byte v[16:17], v13, off offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v12, off offset:38 +; ALIGNED-NEXT: global_store_byte v[16:17], v12, off offset:36 +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v11, off offset:30 +; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v10, off offset:26 +; ALIGNED-NEXT: global_store_byte v[16:17], v10, off offset:24 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v9, off offset:22 +; ALIGNED-NEXT: global_store_byte v[16:17], v9, off offset:20 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v8, off offset:18 +; ALIGNED-NEXT: global_store_byte v[16:17], v8, off offset:16 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:247 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v27 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:255 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 8, v23 +; ALIGNED-NEXT: global_store_byte v[16:17], v21, off offset:253 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 24, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 8, v22 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:251 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v29 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 8, v29 +; ALIGNED-NEXT: global_store_byte v[16:17], v20, off offset:249 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 24, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 8, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v27, 8, v27 +; ALIGNED-NEXT: global_store_byte v[16:17], v19, off offset:245 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 24, v26 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v26, 8, v26 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:215 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v51 +; ALIGNED-NEXT: global_store_byte v[16:17], v113, off offset:243 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v33 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 8, v33 +; ALIGNED-NEXT: global_store_byte v[16:17], v18, off offset:241 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 24, v32 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 8, v32 +; ALIGNED-NEXT: global_store_byte v[16:17], v114, off offset:239 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 8, v31 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:237 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v30 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 8, v30 +; ALIGNED-NEXT: global_store_byte v[16:17], v115, off offset:235 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v37 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 8, v37 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:233 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v36 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 8, v36 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:231 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 8, v35 +; ALIGNED-NEXT: global_store_byte v[16:17], v23, off offset:229 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 24, v34 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 8, v34 +; ALIGNED-NEXT: global_store_byte v[16:17], v21, off offset:227 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 24, v49 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v49 +; ALIGNED-NEXT: global_store_byte v[16:17], v22, off offset:225 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 24, v48 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 8, v48 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:223 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v39 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 8, v39 +; ALIGNED-NEXT: global_store_byte v[16:17], v29, off offset:221 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 24, v38 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 8, v38 +; ALIGNED-NEXT: global_store_byte v[16:17], v20, off offset:219 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 24, v53 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 8, v53 +; ALIGNED-NEXT: global_store_byte v[16:17], v28, off offset:217 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 24, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 8, v52 +; ALIGNED-NEXT: global_store_byte v[16:17], v27, off offset:213 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v27, 24, v50 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 8, v50 +; ALIGNED-NEXT: global_store_byte v[16:17], v19, off offset:211 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 24, v65 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v65 +; ALIGNED-NEXT: global_store_byte v[16:17], v26, off offset:209 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v26, 24, v64 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:149 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 8, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 8, v64 +; ALIGNED-NEXT: global_store_byte v[16:17], v113, off offset:207 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v55 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 8, v55 +; ALIGNED-NEXT: global_store_byte v[16:17], v33, off offset:205 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 24, v54 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v54 +; ALIGNED-NEXT: global_store_byte v[16:17], v18, off offset:203 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 24, v69 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v69 +; ALIGNED-NEXT: global_store_byte v[16:17], v32, off offset:201 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 24, v68 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v68 +; ALIGNED-NEXT: global_store_byte v[16:17], v114, off offset:199 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v67 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v67 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:197 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v66 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 8, v66 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:195 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v81 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v81 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:193 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 8, v80 +; ALIGNED-NEXT: global_store_byte v[16:17], v115, off offset:191 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v71 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v71 +; ALIGNED-NEXT: global_store_byte v[16:17], v37, off offset:189 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 24, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v70 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:187 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v85 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v85 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:185 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v84 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 8, v84 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:183 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v83 +; ALIGNED-NEXT: global_store_byte v[16:17], v35, off offset:181 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 24, v82 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 8, v82 +; ALIGNED-NEXT: global_store_byte v[16:17], v23, off offset:179 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 24, v97 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 8, v97 +; ALIGNED-NEXT: global_store_byte v[16:17], v34, off offset:177 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 24, v96 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 8, v96 +; ALIGNED-NEXT: global_store_byte v[16:17], v21, off offset:175 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 24, v87 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v87 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:173 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v86 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 8, v86 +; ALIGNED-NEXT: global_store_byte v[16:17], v22, off offset:171 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 24, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v101 +; ALIGNED-NEXT: global_store_byte v[16:17], v48, off offset:169 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 24, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v100 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:167 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v99 +; ALIGNED-NEXT: global_store_byte v[16:17], v39, off offset:165 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 24, v98 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v98 +; ALIGNED-NEXT: global_store_byte v[16:17], v29, off offset:163 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 24, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v15, 8, v15 +; ALIGNED-NEXT: global_store_byte v[16:17], v38, off offset:161 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 24, v14 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v14, 8, v14 +; ALIGNED-NEXT: global_store_byte v[16:17], v20, off offset:159 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 24, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v13 +; ALIGNED-NEXT: global_store_byte v[16:17], v53, off offset:157 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 24, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v12, 8, v12 +; ALIGNED-NEXT: global_store_byte v[16:17], v28, off offset:155 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 24, v11 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 8, v11 +; ALIGNED-NEXT: global_store_byte v[16:17], v52, off offset:153 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 24, v10 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 8, v10 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:151 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v9 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v9 +; ALIGNED-NEXT: global_store_byte v[16:17], v27, off offset:147 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:145 +; ALIGNED-NEXT: global_store_byte v[16:17], v19, off offset:143 +; ALIGNED-NEXT: global_store_byte v[16:17], v65, off offset:141 +; ALIGNED-NEXT: global_store_byte v[16:17], v26, off offset:139 +; ALIGNED-NEXT: global_store_byte v[16:17], v64, off offset:137 +; ALIGNED-NEXT: global_store_byte v[16:17], v113, off offset:135 +; ALIGNED-NEXT: global_store_byte v[16:17], v55, off offset:133 +; ALIGNED-NEXT: global_store_byte v[16:17], v33, off offset:131 +; ALIGNED-NEXT: global_store_byte v[16:17], v54, off offset:129 +; ALIGNED-NEXT: global_store_byte v[16:17], v18, off offset:127 +; ALIGNED-NEXT: global_store_byte v[16:17], v69, off offset:125 +; ALIGNED-NEXT: global_store_byte v[16:17], v32, off offset:123 +; ALIGNED-NEXT: global_store_byte v[16:17], v68, off offset:121 +; ALIGNED-NEXT: global_store_byte v[16:17], v114, off offset:119 +; ALIGNED-NEXT: global_store_byte v[16:17], v67, off offset:117 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:115 +; ALIGNED-NEXT: global_store_byte v[16:17], v66, off offset:113 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:111 +; ALIGNED-NEXT: global_store_byte v[16:17], v81, off offset:109 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:107 +; ALIGNED-NEXT: global_store_byte v[16:17], v80, off offset:105 +; ALIGNED-NEXT: global_store_byte v[16:17], v115, off offset:103 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:101 +; ALIGNED-NEXT: global_store_byte v[16:17], v37, off offset:99 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:97 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:95 +; ALIGNED-NEXT: global_store_byte v[16:17], v85, off offset:93 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:91 +; ALIGNED-NEXT: global_store_byte v[16:17], v84, off offset:89 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:87 +; ALIGNED-NEXT: global_store_byte v[16:17], v83, off offset:85 +; ALIGNED-NEXT: global_store_byte v[16:17], v35, off offset:83 +; ALIGNED-NEXT: global_store_byte v[16:17], v82, off offset:81 +; ALIGNED-NEXT: global_store_byte v[16:17], v23, off offset:79 +; ALIGNED-NEXT: global_store_byte v[16:17], v97, off offset:77 +; ALIGNED-NEXT: global_store_byte v[16:17], v34, off offset:75 +; ALIGNED-NEXT: global_store_byte v[16:17], v96, off offset:73 +; ALIGNED-NEXT: global_store_byte v[16:17], v21, off offset:71 +; ALIGNED-NEXT: global_store_byte v[16:17], v87, off offset:69 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:67 +; ALIGNED-NEXT: global_store_byte v[16:17], v86, off offset:65 +; ALIGNED-NEXT: global_store_byte v[16:17], v22, off offset:63 +; ALIGNED-NEXT: global_store_byte v[16:17], v101, off offset:61 +; ALIGNED-NEXT: global_store_byte v[16:17], v48, off offset:59 +; ALIGNED-NEXT: global_store_byte v[16:17], v100, off offset:57 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:55 +; ALIGNED-NEXT: global_store_byte v[16:17], v99, off offset:53 +; ALIGNED-NEXT: global_store_byte v[16:17], v39, off offset:51 +; ALIGNED-NEXT: global_store_byte v[16:17], v98, off offset:49 +; ALIGNED-NEXT: global_store_byte v[16:17], v29, off offset:43 +; ALIGNED-NEXT: global_store_byte v[16:17], v15, off offset:41 +; ALIGNED-NEXT: global_store_byte v[16:17], v38, off offset:47 +; ALIGNED-NEXT: global_store_byte v[16:17], v14, off offset:45 +; ALIGNED-NEXT: global_store_byte v[16:17], v20, off offset:35 +; ALIGNED-NEXT: global_store_byte v[16:17], v13, off offset:33 +; ALIGNED-NEXT: global_store_byte v[16:17], v53, off offset:39 +; ALIGNED-NEXT: global_store_byte v[16:17], v12, off offset:37 +; ALIGNED-NEXT: global_store_byte v[16:17], v28, off offset:31 +; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:29 +; ALIGNED-NEXT: global_store_byte v[16:17], v52, off offset:27 +; ALIGNED-NEXT: global_store_byte v[16:17], v10, off offset:25 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:23 +; ALIGNED-NEXT: global_store_byte v[16:17], v9, off offset:21 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:19 +; ALIGNED-NEXT: global_store_byte v[16:17], v8, off offset:17 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v7, off offset:14 +; ALIGNED-NEXT: global_store_byte v[16:17], v7, off offset:12 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v6, off offset:10 +; ALIGNED-NEXT: global_store_byte v[16:17], v6, off offset:8 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v5, off offset:6 +; ALIGNED-NEXT: global_store_byte v[16:17], v5, off offset:4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v4, off offset:2 +; ALIGNED-NEXT: global_store_byte v[16:17], v4, off +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 24, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v7, 8, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 24, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v6, 8, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 24, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v4, 8, v4 +; ALIGNED-NEXT: global_store_byte v[16:17], v8, off offset:15 +; ALIGNED-NEXT: global_store_byte v[16:17], v7, off offset:13 +; ALIGNED-NEXT: global_store_byte v[16:17], v9, off offset:11 +; ALIGNED-NEXT: global_store_byte v[16:17], v6, off offset:9 +; ALIGNED-NEXT: global_store_byte v[16:17], v10, off offset:7 +; ALIGNED-NEXT: global_store_byte v[16:17], v5, off offset:5 +; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3 +; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1 +; ALIGNED-NEXT: s_cbranch_scc1 .LBB6_2 +; ALIGNED-NEXT: .LBB6_3: ; %Flow9 +; ALIGNED-NEXT: s_andn2_saveexec_b32 s8, s6 +; ALIGNED-NEXT: s_cbranch_execz .LBB6_6 +; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader +; ALIGNED-NEXT: s_movk_i32 s6, 0xff00 +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0x700 +; ALIGNED-NEXT: s_mov_b32 s7, -1 +; ALIGNED-NEXT: .LBB6_5: ; %memmove_bwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: v_add_co_u32 v24, vcc_lo, v2, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v25, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: s_clause 0xf +; ALIGNED-NEXT: global_load_dwordx4 v[16:19], v[24:25], off offset:240 +; ALIGNED-NEXT: global_load_dwordx4 v[20:23], v[24:25], off offset:224 +; ALIGNED-NEXT: global_load_dwordx4 v[4:7], v[24:25], off +; ALIGNED-NEXT: global_load_dwordx4 v[8:11], v[24:25], off offset:16 +; ALIGNED-NEXT: global_load_dwordx4 v[12:15], v[24:25], off offset:32 +; ALIGNED-NEXT: global_load_dwordx4 v[112:115], v[24:25], off offset:48 +; ALIGNED-NEXT: global_load_dwordx4 v[116:119], v[24:25], off offset:64 +; ALIGNED-NEXT: global_load_dwordx4 v[40:43], v[24:25], off offset:80 +; ALIGNED-NEXT: global_load_dwordx4 v[26:29], v[24:25], off offset:96 +; ALIGNED-NEXT: global_load_dwordx4 v[32:35], v[24:25], off offset:112 +; ALIGNED-NEXT: global_load_dwordx4 v[44:47], v[24:25], off offset:128 +; ALIGNED-NEXT: global_load_dwordx4 v[52:55], v[24:25], off offset:144 +; ALIGNED-NEXT: global_load_dwordx4 v[66:69], v[24:25], off offset:160 +; ALIGNED-NEXT: global_load_dwordx4 v[81:84], v[24:25], off offset:176 +; ALIGNED-NEXT: global_load_dwordx4 v[96:99], v[24:25], off offset:192 +; ALIGNED-NEXT: global_load_dwordx4 v[100:103], v[24:25], off offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(15) +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:320 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:324 +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:328 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:332 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:332 +; ALIGNED-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:328 +; ALIGNED-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:324 +; ALIGNED-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:320 +; ALIGNED-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; ALIGNED-NEXT: s_add_u32 s4, s4, 0xffffff00 +; ALIGNED-NEXT: s_addc_u32 s5, s5, -1 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v31, off offset:254 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:252 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v30, off offset:250 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:248 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v25, off offset:246 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:244 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v24, off offset:242 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:240 +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:336 +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:340 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:344 +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:348 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v51, off, s[0:3], s32 offset:348 +; ALIGNED-NEXT: buffer_load_dword v50, off, s[0:3], s32 offset:344 +; ALIGNED-NEXT: buffer_load_dword v49, off, s[0:3], s32 offset:340 +; ALIGNED-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:336 +; ALIGNED-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v51, off offset:238 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:236 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v50, off offset:234 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:232 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v49, off offset:230 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:228 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v36, off offset:226 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:224 +; ALIGNED-NEXT: buffer_store_dword v100, off, s[0:3], s32 offset:288 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:292 +; ALIGNED-NEXT: buffer_store_dword v102, off, s[0:3], s32 offset:296 +; ALIGNED-NEXT: buffer_store_dword v103, off, s[0:3], s32 offset:300 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v71, off, s[0:3], s32 offset:300 +; ALIGNED-NEXT: buffer_load_dword v70, off, s[0:3], s32 offset:296 +; ALIGNED-NEXT: buffer_load_dword v65, off, s[0:3], s32 offset:292 +; ALIGNED-NEXT: buffer_load_dword v64, off, s[0:3], s32 offset:288 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 8, v31 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v30 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 8, v30 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v71, off offset:222 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:220 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v70, off offset:218 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:216 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v65, off offset:214 +; ALIGNED-NEXT: global_store_byte v[16:17], v65, off offset:212 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v64, off offset:210 +; ALIGNED-NEXT: global_store_byte v[16:17], v64, off offset:208 +; ALIGNED-NEXT: buffer_store_dword v96, off, s[0:3], s32 offset:304 +; ALIGNED-NEXT: buffer_store_dword v97, off, s[0:3], s32 offset:308 +; ALIGNED-NEXT: buffer_store_dword v98, off, s[0:3], s32 offset:312 +; ALIGNED-NEXT: buffer_store_dword v99, off, s[0:3], s32 offset:316 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v87, off, s[0:3], s32 offset:316 +; ALIGNED-NEXT: buffer_load_dword v86, off, s[0:3], s32 offset:312 +; ALIGNED-NEXT: buffer_load_dword v85, off, s[0:3], s32 offset:308 +; ALIGNED-NEXT: buffer_load_dword v80, off, s[0:3], s32 offset:304 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v87, off offset:206 +; ALIGNED-NEXT: global_store_byte v[16:17], v87, off offset:204 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v86, off offset:202 +; ALIGNED-NEXT: global_store_byte v[16:17], v86, off offset:200 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v85, off offset:198 +; ALIGNED-NEXT: global_store_byte v[16:17], v85, off offset:196 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v80, off offset:194 +; ALIGNED-NEXT: global_store_byte v[16:17], v80, off offset:192 +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:384 +; ALIGNED-NEXT: buffer_store_dword v82, off, s[0:3], s32 offset:388 +; ALIGNED-NEXT: buffer_store_dword v83, off, s[0:3], s32 offset:392 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:396 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v101, off, s[0:3], s32 offset:396 +; ALIGNED-NEXT: buffer_load_dword v99, off, s[0:3], s32 offset:392 +; ALIGNED-NEXT: buffer_load_dword v96, off, s[0:3], s32 offset:388 +; ALIGNED-NEXT: buffer_load_dword v81, off, s[0:3], s32 offset:384 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v101, off offset:190 +; ALIGNED-NEXT: global_store_byte v[16:17], v101, off offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v99, off offset:186 +; ALIGNED-NEXT: global_store_byte v[16:17], v99, off offset:184 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v96, off offset:182 +; ALIGNED-NEXT: global_store_byte v[16:17], v96, off offset:180 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v81, off offset:178 +; ALIGNED-NEXT: global_store_byte v[16:17], v81, off offset:176 +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:400 +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:404 +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:408 +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:412 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v100, off, s[0:3], s32 offset:412 +; ALIGNED-NEXT: buffer_load_dword v97, off, s[0:3], s32 offset:408 +; ALIGNED-NEXT: buffer_load_dword v82, off, s[0:3], s32 offset:404 +; ALIGNED-NEXT: buffer_load_dword v66, off, s[0:3], s32 offset:400 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v100, off offset:174 +; ALIGNED-NEXT: global_store_byte v[16:17], v100, off offset:172 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v97, off offset:170 +; ALIGNED-NEXT: global_store_byte v[16:17], v97, off offset:168 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v82, off offset:166 +; ALIGNED-NEXT: global_store_byte v[16:17], v82, off offset:164 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v66, off offset:162 +; ALIGNED-NEXT: global_store_byte v[16:17], v66, off offset:160 +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:352 +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:356 +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:360 +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:364 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v98, off, s[0:3], s32 offset:364 +; ALIGNED-NEXT: buffer_load_dword v83, off, s[0:3], s32 offset:360 +; ALIGNED-NEXT: buffer_load_dword v67, off, s[0:3], s32 offset:356 +; ALIGNED-NEXT: buffer_load_dword v52, off, s[0:3], s32 offset:352 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v98, off offset:158 +; ALIGNED-NEXT: global_store_byte v[16:17], v98, off offset:156 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v83, off offset:154 +; ALIGNED-NEXT: global_store_byte v[16:17], v83, off offset:152 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v67, off offset:150 +; ALIGNED-NEXT: global_store_byte v[16:17], v67, off offset:148 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v52, off offset:146 +; ALIGNED-NEXT: global_store_byte v[16:17], v52, off offset:144 +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:368 +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:372 +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:376 +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:380 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v84, off, s[0:3], s32 offset:380 +; ALIGNED-NEXT: buffer_load_dword v68, off, s[0:3], s32 offset:376 +; ALIGNED-NEXT: buffer_load_dword v53, off, s[0:3], s32 offset:372 +; ALIGNED-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:368 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v84, off offset:142 +; ALIGNED-NEXT: global_store_byte v[16:17], v84, off offset:140 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v68, off offset:138 +; ALIGNED-NEXT: global_store_byte v[16:17], v68, off offset:136 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v53, off offset:134 +; ALIGNED-NEXT: global_store_byte v[16:17], v53, off offset:132 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v37, off offset:130 +; ALIGNED-NEXT: global_store_byte v[16:17], v37, off offset:128 +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:448 +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:452 +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:456 +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:460 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v69, off, s[0:3], s32 offset:460 +; ALIGNED-NEXT: buffer_load_dword v54, off, s[0:3], s32 offset:456 +; ALIGNED-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:452 +; ALIGNED-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:448 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v69, off offset:126 +; ALIGNED-NEXT: global_store_byte v[16:17], v69, off offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v54, off offset:122 +; ALIGNED-NEXT: global_store_byte v[16:17], v54, off offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v38, off offset:118 +; ALIGNED-NEXT: global_store_byte v[16:17], v38, off offset:116 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v32, off offset:114 +; ALIGNED-NEXT: global_store_byte v[16:17], v32, off offset:112 +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:464 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:468 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:472 +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:476 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v55, off, s[0:3], s32 offset:476 +; ALIGNED-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:472 +; ALIGNED-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:468 +; ALIGNED-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:464 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v55, off offset:110 +; ALIGNED-NEXT: global_store_byte v[16:17], v55, off offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v39, off offset:106 +; ALIGNED-NEXT: global_store_byte v[16:17], v39, off offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v33, off offset:102 +; ALIGNED-NEXT: global_store_byte v[16:17], v33, off offset:100 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v26, off offset:98 +; ALIGNED-NEXT: global_store_byte v[16:17], v26, off offset:96 +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:416 +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:420 +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:424 +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:428 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v48, off, s[0:3], s32 offset:428 +; ALIGNED-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:424 +; ALIGNED-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:420 +; ALIGNED-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:416 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v48, off offset:94 +; ALIGNED-NEXT: global_store_byte v[16:17], v48, off offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v34, off offset:90 +; ALIGNED-NEXT: global_store_byte v[16:17], v34, off offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v27, off offset:86 +; ALIGNED-NEXT: global_store_byte v[16:17], v27, off offset:84 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v21, off offset:82 +; ALIGNED-NEXT: global_store_byte v[16:17], v21, off offset:80 +; ALIGNED-NEXT: buffer_store_dword v116, off, s[0:3], s32 offset:432 +; ALIGNED-NEXT: buffer_store_dword v117, off, s[0:3], s32 offset:436 +; ALIGNED-NEXT: buffer_store_dword v118, off, s[0:3], s32 offset:440 +; ALIGNED-NEXT: buffer_store_dword v119, off, s[0:3], s32 offset:444 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:444 +; ALIGNED-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:440 +; ALIGNED-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:436 +; ALIGNED-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:432 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v35, off offset:78 +; ALIGNED-NEXT: global_store_byte v[16:17], v35, off offset:76 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v28, off offset:74 +; ALIGNED-NEXT: global_store_byte v[16:17], v28, off offset:72 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v22, off offset:70 +; ALIGNED-NEXT: global_store_byte v[16:17], v22, off offset:68 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v19, off offset:66 +; ALIGNED-NEXT: global_store_byte v[16:17], v19, off offset:64 +; ALIGNED-NEXT: buffer_store_dword v112, off, s[0:3], s32 offset:512 +; ALIGNED-NEXT: buffer_store_dword v113, off, s[0:3], s32 offset:516 +; ALIGNED-NEXT: buffer_store_dword v114, off, s[0:3], s32 offset:520 +; ALIGNED-NEXT: buffer_store_dword v115, off, s[0:3], s32 offset:524 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:524 +; ALIGNED-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:520 +; ALIGNED-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:516 +; ALIGNED-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:512 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 8, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 8, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v51 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v50 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 8, v50 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v29, off offset:62 +; ALIGNED-NEXT: global_store_byte v[16:17], v29, off offset:60 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v23, off offset:58 +; ALIGNED-NEXT: global_store_byte v[16:17], v23, off offset:56 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v20, off offset:54 +; ALIGNED-NEXT: global_store_byte v[16:17], v20, off offset:52 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v18, off offset:50 +; ALIGNED-NEXT: global_store_byte v[16:17], v18, off offset:48 +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:528 +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:532 +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:536 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:540 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:536 +; ALIGNED-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:540 +; ALIGNED-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:528 +; ALIGNED-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:532 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v15, off offset:42 +; ALIGNED-NEXT: global_store_byte v[16:17], v15, off offset:40 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v14, off offset:46 +; ALIGNED-NEXT: global_store_byte v[16:17], v14, off offset:44 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v13, off offset:34 +; ALIGNED-NEXT: global_store_byte v[16:17], v13, off offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v12, off offset:38 +; ALIGNED-NEXT: global_store_byte v[16:17], v12, off offset:36 +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:480 +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:484 +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:488 +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:492 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:492 +; ALIGNED-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:488 +; ALIGNED-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:484 +; ALIGNED-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:480 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v11, off offset:30 +; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v10, off offset:26 +; ALIGNED-NEXT: global_store_byte v[16:17], v10, off offset:24 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v9, off offset:22 +; ALIGNED-NEXT: global_store_byte v[16:17], v9, off offset:20 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v8, off offset:18 +; ALIGNED-NEXT: global_store_byte v[16:17], v8, off offset:16 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:496 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:500 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:504 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:508 +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:508 +; ALIGNED-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:504 +; ALIGNED-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:500 +; ALIGNED-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:496 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:247 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v65 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:255 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v49 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v49 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:253 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v36 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 8, v36 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:251 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v71 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v71 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:249 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v70 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v65 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:245 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v64 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 8, v64 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:215 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v67 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v67 +; ALIGNED-NEXT: global_store_byte v[16:17], v113, off offset:243 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v87 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v87 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:241 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v86 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 8, v86 +; ALIGNED-NEXT: global_store_byte v[16:17], v114, off offset:239 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v85 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v85 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:237 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 8, v80 +; ALIGNED-NEXT: global_store_byte v[16:17], v115, off offset:235 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v101 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:233 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v99 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:231 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v96 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 8, v96 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:229 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v81 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v81 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:227 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v100 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:225 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v97 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 8, v97 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:223 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v82 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 8, v82 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:221 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v66 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 8, v66 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:219 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v98 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v98 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:217 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v83 +; ALIGNED-NEXT: global_store_byte v[16:17], v65, off offset:213 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 24, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 8, v52 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:211 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 24, v84 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 8, v84 +; ALIGNED-NEXT: global_store_byte v[16:17], v64, off offset:209 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v68 +; ALIGNED-NEXT: global_store_byte v[16:17], v67, off offset:149 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 24, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 8, v8 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v68 +; ALIGNED-NEXT: global_store_byte v[16:17], v113, off offset:207 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v53 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 8, v53 +; ALIGNED-NEXT: global_store_byte v[16:17], v87, off offset:205 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 24, v37 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 8, v37 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:203 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v69 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v69 +; ALIGNED-NEXT: global_store_byte v[16:17], v86, off offset:201 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v54 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v54 +; ALIGNED-NEXT: global_store_byte v[16:17], v114, off offset:199 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v38 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 8, v38 +; ALIGNED-NEXT: global_store_byte v[16:17], v85, off offset:197 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 24, v32 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 8, v32 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:195 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 24, v55 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 8, v55 +; ALIGNED-NEXT: global_store_byte v[16:17], v80, off offset:193 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v39 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 8, v39 +; ALIGNED-NEXT: global_store_byte v[16:17], v115, off offset:191 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 24, v33 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 8, v33 +; ALIGNED-NEXT: global_store_byte v[16:17], v101, off offset:189 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v26 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v26, 8, v26 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:187 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v48 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 8, v48 +; ALIGNED-NEXT: global_store_byte v[16:17], v99, off offset:185 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v34 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 8, v34 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:183 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v27, 8, v27 +; ALIGNED-NEXT: global_store_byte v[16:17], v96, off offset:181 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v96, 24, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v21, 8, v21 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:179 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 24, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 8, v35 +; ALIGNED-NEXT: global_store_byte v[16:17], v81, off offset:177 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 24, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 8, v28 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:175 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 24, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v22, 8, v22 +; ALIGNED-NEXT: global_store_byte v[16:17], v100, off offset:173 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 24, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v19, 8, v19 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:171 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v29 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 8, v29 +; ALIGNED-NEXT: global_store_byte v[16:17], v97, off offset:169 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v97, 24, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v23, 8, v23 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:167 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v20, 8, v20 +; ALIGNED-NEXT: global_store_byte v[16:17], v82, off offset:165 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v18, 8, v18 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:163 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v15, 8, v15 +; ALIGNED-NEXT: global_store_byte v[16:17], v66, off offset:161 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v14 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v14, 8, v14 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:159 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v13, 8, v13 +; ALIGNED-NEXT: global_store_byte v[16:17], v98, off offset:157 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v12, 8, v12 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:155 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v11 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 8, v11 +; ALIGNED-NEXT: global_store_byte v[16:17], v83, off offset:153 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 24, v10 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 8, v10 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:151 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 24, v9 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 8, v9 +; ALIGNED-NEXT: global_store_byte v[16:17], v65, off offset:147 +; ALIGNED-NEXT: global_store_byte v[16:17], v52, off offset:145 +; ALIGNED-NEXT: global_store_byte v[16:17], v25, off offset:143 +; ALIGNED-NEXT: global_store_byte v[16:17], v84, off offset:141 +; ALIGNED-NEXT: global_store_byte v[16:17], v64, off offset:139 +; ALIGNED-NEXT: global_store_byte v[16:17], v68, off offset:137 +; ALIGNED-NEXT: global_store_byte v[16:17], v113, off offset:135 +; ALIGNED-NEXT: global_store_byte v[16:17], v53, off offset:133 +; ALIGNED-NEXT: global_store_byte v[16:17], v87, off offset:131 +; ALIGNED-NEXT: global_store_byte v[16:17], v37, off offset:129 +; ALIGNED-NEXT: global_store_byte v[16:17], v24, off offset:127 +; ALIGNED-NEXT: global_store_byte v[16:17], v69, off offset:125 +; ALIGNED-NEXT: global_store_byte v[16:17], v86, off offset:123 +; ALIGNED-NEXT: global_store_byte v[16:17], v54, off offset:121 +; ALIGNED-NEXT: global_store_byte v[16:17], v114, off offset:119 +; ALIGNED-NEXT: global_store_byte v[16:17], v38, off offset:117 +; ALIGNED-NEXT: global_store_byte v[16:17], v85, off offset:115 +; ALIGNED-NEXT: global_store_byte v[16:17], v32, off offset:113 +; ALIGNED-NEXT: global_store_byte v[16:17], v51, off offset:111 +; ALIGNED-NEXT: global_store_byte v[16:17], v55, off offset:109 +; ALIGNED-NEXT: global_store_byte v[16:17], v80, off offset:107 +; ALIGNED-NEXT: global_store_byte v[16:17], v39, off offset:105 +; ALIGNED-NEXT: global_store_byte v[16:17], v115, off offset:103 +; ALIGNED-NEXT: global_store_byte v[16:17], v33, off offset:101 +; ALIGNED-NEXT: global_store_byte v[16:17], v101, off offset:99 +; ALIGNED-NEXT: global_store_byte v[16:17], v26, off offset:97 +; ALIGNED-NEXT: global_store_byte v[16:17], v50, off offset:95 +; ALIGNED-NEXT: global_store_byte v[16:17], v48, off offset:93 +; ALIGNED-NEXT: global_store_byte v[16:17], v99, off offset:91 +; ALIGNED-NEXT: global_store_byte v[16:17], v34, off offset:89 +; ALIGNED-NEXT: global_store_byte v[16:17], v102, off offset:87 +; ALIGNED-NEXT: global_store_byte v[16:17], v27, off offset:85 +; ALIGNED-NEXT: global_store_byte v[16:17], v96, off offset:83 +; ALIGNED-NEXT: global_store_byte v[16:17], v21, off offset:81 +; ALIGNED-NEXT: global_store_byte v[16:17], v49, off offset:79 +; ALIGNED-NEXT: global_store_byte v[16:17], v35, off offset:77 +; ALIGNED-NEXT: global_store_byte v[16:17], v81, off offset:75 +; ALIGNED-NEXT: global_store_byte v[16:17], v28, off offset:73 +; ALIGNED-NEXT: global_store_byte v[16:17], v31, off offset:71 +; ALIGNED-NEXT: global_store_byte v[16:17], v22, off offset:69 +; ALIGNED-NEXT: global_store_byte v[16:17], v100, off offset:67 +; ALIGNED-NEXT: global_store_byte v[16:17], v19, off offset:65 +; ALIGNED-NEXT: global_store_byte v[16:17], v36, off offset:63 +; ALIGNED-NEXT: global_store_byte v[16:17], v29, off offset:61 +; ALIGNED-NEXT: global_store_byte v[16:17], v97, off offset:59 +; ALIGNED-NEXT: global_store_byte v[16:17], v23, off offset:57 +; ALIGNED-NEXT: global_store_byte v[16:17], v103, off offset:55 +; ALIGNED-NEXT: global_store_byte v[16:17], v20, off offset:53 +; ALIGNED-NEXT: global_store_byte v[16:17], v82, off offset:51 +; ALIGNED-NEXT: global_store_byte v[16:17], v18, off offset:49 +; ALIGNED-NEXT: global_store_byte v[16:17], v71, off offset:43 +; ALIGNED-NEXT: global_store_byte v[16:17], v15, off offset:41 +; ALIGNED-NEXT: global_store_byte v[16:17], v66, off offset:47 +; ALIGNED-NEXT: global_store_byte v[16:17], v14, off offset:45 +; ALIGNED-NEXT: global_store_byte v[16:17], v30, off offset:35 +; ALIGNED-NEXT: global_store_byte v[16:17], v13, off offset:33 +; ALIGNED-NEXT: global_store_byte v[16:17], v98, off offset:39 +; ALIGNED-NEXT: global_store_byte v[16:17], v12, off offset:37 +; ALIGNED-NEXT: global_store_byte v[16:17], v70, off offset:31 +; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:29 +; ALIGNED-NEXT: global_store_byte v[16:17], v83, off offset:27 +; ALIGNED-NEXT: global_store_byte v[16:17], v10, off offset:25 +; ALIGNED-NEXT: global_store_byte v[16:17], v112, off offset:23 +; ALIGNED-NEXT: global_store_byte v[16:17], v9, off offset:21 +; ALIGNED-NEXT: global_store_byte v[16:17], v67, off offset:19 +; ALIGNED-NEXT: global_store_byte v[16:17], v8, off offset:17 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v7, off offset:14 +; ALIGNED-NEXT: global_store_byte v[16:17], v7, off offset:12 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v6, off offset:10 +; ALIGNED-NEXT: global_store_byte v[16:17], v6, off offset:8 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v5, off offset:6 +; ALIGNED-NEXT: global_store_byte v[16:17], v5, off offset:4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: global_store_byte_d16_hi v[16:17], v4, off offset:2 +; ALIGNED-NEXT: global_store_byte v[16:17], v4, off +; ALIGNED-NEXT: v_lshrrev_b32_e32 v8, 24, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v7, 8, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v9, 24, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v6, 8, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v10, 24, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v5, 8, v5 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v11, 24, v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v4, 8, v4 +; ALIGNED-NEXT: global_store_byte v[16:17], v8, off offset:15 +; ALIGNED-NEXT: global_store_byte v[16:17], v7, off offset:13 +; ALIGNED-NEXT: global_store_byte v[16:17], v9, off offset:11 +; ALIGNED-NEXT: global_store_byte v[16:17], v6, off offset:9 +; ALIGNED-NEXT: global_store_byte v[16:17], v10, off offset:7 +; ALIGNED-NEXT: global_store_byte v[16:17], v5, off offset:5 +; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3 +; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1 +; ALIGNED-NEXT: s_cbranch_scc0 .LBB6_5 +; ALIGNED-NEXT: .LBB6_6: ; %Flow10 +; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 +; ALIGNED-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memmove_p1_p1_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: s_mov_b32 s4, exec_lo +; UNROLL3-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] +; UNROLL3-NEXT: s_xor_b32 s6, exec_lo, s4 +; UNROLL3-NEXT: s_cbranch_execz .LBB6_4 +; UNROLL3-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB6_2: ; %memmove_fwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: v_add_co_u32 v12, vcc_lo, v2, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_clause 0x2 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[12:13], off +; UNROLL3-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:16 +; UNROLL3-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:32 +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[4:7], off +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[8:11], off offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:32 +; UNROLL3-NEXT: s_cmp_lg_u64 s[4:5], 0x7e0 +; UNROLL3-NEXT: s_cbranch_scc1 .LBB6_2 +; UNROLL3-NEXT: ; %bb.3: ; %memmove_fwd_residual +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2016 +; UNROLL3-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:2032 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:2032 +; UNROLL3-NEXT: ; implicit-def: $vgpr2_vgpr3 +; UNROLL3-NEXT: ; implicit-def: $vgpr0_vgpr1 +; UNROLL3-NEXT: .LBB6_4: ; %Flow7 +; UNROLL3-NEXT: s_andn2_saveexec_b32 s8, s6 +; UNROLL3-NEXT: s_cbranch_execz .LBB6_7 +; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:2032 +; UNROLL3-NEXT: s_movk_i32 s6, 0xffd0 +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0x7b0 +; UNROLL3-NEXT: s_mov_b32 s7, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2032 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:2016 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB6_6: ; %memmove_bwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: v_add_co_u32 v12, vcc_lo, v2, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_clause 0x2 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[12:13], off +; UNROLL3-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:16 +; UNROLL3-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:32 +; UNROLL3-NEXT: s_add_u32 s4, s4, 0xffffffd0 +; UNROLL3-NEXT: s_addc_u32 s5, s5, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[4:7], off +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[8:11], off offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:32 +; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; UNROLL3-NEXT: s_cbranch_scc0 .LBB6_6 +; UNROLL3-NEXT: .LBB6_7: ; %Flow8 +; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + +define void @memmove_p0_p4_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { +; CHECK-LABEL: memmove_p0_p4_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s4, exec_lo +; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] +; CHECK-NEXT: s_xor_b32 s6, exec_lo, s4 +; CHECK-NEXT: s_cbranch_execz .LBB7_3 +; CHECK-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: .LBB7_2: ; %memmove_fwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_add_co_u32 v96, vcc_lo, v2, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_clause 0xf +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[96:97], off offset:240 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[96:97], off offset:224 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v[96:97], off offset:208 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v[96:97], off offset:192 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v[96:97], off offset:176 +; CHECK-NEXT: global_load_dwordx4 v[24:27], v[96:97], off offset:160 +; CHECK-NEXT: global_load_dwordx4 v[28:31], v[96:97], off offset:144 +; CHECK-NEXT: global_load_dwordx4 v[32:35], v[96:97], off offset:128 +; CHECK-NEXT: global_load_dwordx4 v[36:39], v[96:97], off offset:112 +; CHECK-NEXT: global_load_dwordx4 v[48:51], v[96:97], off offset:96 +; CHECK-NEXT: global_load_dwordx4 v[52:55], v[96:97], off offset:80 +; CHECK-NEXT: global_load_dwordx4 v[64:67], v[96:97], off offset:64 +; CHECK-NEXT: global_load_dwordx4 v[68:71], v[96:97], off offset:48 +; CHECK-NEXT: global_load_dwordx4 v[80:83], v[96:97], off offset:32 +; CHECK-NEXT: global_load_dwordx4 v[84:87], v[96:97], off offset:16 +; CHECK-NEXT: global_load_dwordx4 v[96:99], v[96:97], off +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: s_waitcnt vmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[4:7] offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(14) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[8:11] offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[12:15] offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[16:19] offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(11) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[20:23] offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(10) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[24:27] offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[28:31] offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[32:35] offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(7) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[36:39] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[48:51] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[52:55] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[64:67] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[68:71] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[80:83] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[84:87] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[96:99] +; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0x800 +; CHECK-NEXT: s_cbranch_scc1 .LBB7_2 +; CHECK-NEXT: .LBB7_3: ; %Flow6 +; CHECK-NEXT: s_andn2_saveexec_b32 s8, s6 +; CHECK-NEXT: s_cbranch_execz .LBB7_6 +; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader +; CHECK-NEXT: s_movk_i32 s6, 0xff00 +; CHECK-NEXT: s_mov_b64 s[4:5], 0x700 +; CHECK-NEXT: s_mov_b32 s7, -1 +; CHECK-NEXT: .LBB7_5: ; %memmove_bwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: v_add_co_u32 v96, vcc_lo, v2, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_clause 0xf +; CHECK-NEXT: global_load_dwordx4 v[4:7], v[96:97], off offset:240 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v[96:97], off offset:224 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v[96:97], off offset:208 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v[96:97], off offset:192 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v[96:97], off offset:176 +; CHECK-NEXT: global_load_dwordx4 v[24:27], v[96:97], off offset:160 +; CHECK-NEXT: global_load_dwordx4 v[28:31], v[96:97], off offset:144 +; CHECK-NEXT: global_load_dwordx4 v[32:35], v[96:97], off offset:128 +; CHECK-NEXT: global_load_dwordx4 v[36:39], v[96:97], off offset:112 +; CHECK-NEXT: global_load_dwordx4 v[48:51], v[96:97], off offset:96 +; CHECK-NEXT: global_load_dwordx4 v[52:55], v[96:97], off offset:80 +; CHECK-NEXT: global_load_dwordx4 v[64:67], v[96:97], off offset:64 +; CHECK-NEXT: global_load_dwordx4 v[68:71], v[96:97], off offset:48 +; CHECK-NEXT: global_load_dwordx4 v[80:83], v[96:97], off offset:32 +; CHECK-NEXT: global_load_dwordx4 v[84:87], v[96:97], off offset:16 +; CHECK-NEXT: global_load_dwordx4 v[96:99], v[96:97], off +; CHECK-NEXT: s_add_u32 s4, s4, 0xffffff00 +; CHECK-NEXT: s_addc_u32 s5, s5, -1 +; CHECK-NEXT: s_waitcnt vmcnt(15) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[4:7] offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(14) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[8:11] offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[12:15] offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[16:19] offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(11) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[20:23] offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(10) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[24:27] offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[28:31] offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[32:35] offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(7) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[36:39] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[48:51] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[52:55] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[64:67] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[68:71] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[80:83] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[84:87] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[96:99] +; CHECK-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; CHECK-NEXT: s_cbranch_scc0 .LBB7_5 +; CHECK-NEXT: .LBB7_6: ; %Flow7 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memmove_p0_p4_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_mov_b32 s4, exec_lo +; ALIGNED-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] +; ALIGNED-NEXT: s_xor_b32 s6, exec_lo, s4 +; ALIGNED-NEXT: s_cbranch_execz .LBB7_3 +; ALIGNED-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; ALIGNED-NEXT: .LBB7_2: ; %memmove_fwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: v_add_co_u32 v4, vcc_lo, v2, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: v_add_co_u32 v96, vcc_lo, v0, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v1, vcc_lo +; ALIGNED-NEXT: s_clause 0xf +; ALIGNED-NEXT: global_load_dwordx4 v[112:115], v[4:5], off offset:240 +; ALIGNED-NEXT: global_load_dwordx4 v[84:87], v[4:5], off offset:224 +; ALIGNED-NEXT: global_load_dwordx4 v[80:83], v[4:5], off offset:208 +; ALIGNED-NEXT: global_load_dwordx4 v[68:71], v[4:5], off offset:192 +; ALIGNED-NEXT: global_load_dwordx4 v[64:67], v[4:5], off offset:176 +; ALIGNED-NEXT: global_load_dwordx4 v[52:55], v[4:5], off offset:160 +; ALIGNED-NEXT: global_load_dwordx4 v[48:51], v[4:5], off offset:144 +; ALIGNED-NEXT: global_load_dwordx4 v[36:39], v[4:5], off offset:128 +; ALIGNED-NEXT: global_load_dwordx4 v[32:35], v[4:5], off offset:112 +; ALIGNED-NEXT: global_load_dwordx4 v[28:31], v[4:5], off offset:96 +; ALIGNED-NEXT: global_load_dwordx4 v[24:27], v[4:5], off offset:80 +; ALIGNED-NEXT: global_load_dwordx4 v[20:23], v[4:5], off offset:64 +; ALIGNED-NEXT: global_load_dwordx4 v[16:19], v[4:5], off offset:48 +; ALIGNED-NEXT: global_load_dwordx4 v[12:15], v[4:5], off offset:32 +; ALIGNED-NEXT: global_load_dwordx4 v[8:11], v[4:5], off offset:16 +; ALIGNED-NEXT: global_load_dwordx4 v[4:7], v[4:5], off +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: s_waitcnt vmcnt(15) +; ALIGNED-NEXT: buffer_store_dword v114, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_store_dword v115, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_store_dword v113, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_store_dword v112, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v114 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v114 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v114 offset:250 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v115 offset:254 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 24, v115 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:252 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v115 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:248 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v113 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v113 offset:246 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 8, v113 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:244 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v112 offset:242 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:240 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v112 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v112 +; ALIGNED-NEXT: s_waitcnt vmcnt(14) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v86 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v86 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:251 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v87 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:249 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v87 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:255 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 24, v85 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:253 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v85 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:247 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v84 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:245 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 8, v84 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:243 +; ALIGNED-NEXT: s_waitcnt vmcnt(13) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v82 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:241 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v82 +; ALIGNED-NEXT: buffer_store_dword v86, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_store_dword v87, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: buffer_store_dword v85, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v86 offset:234 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v87 offset:238 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:236 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:232 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v85 offset:230 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:228 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v84 offset:226 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:224 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v81 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v81 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:235 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v80 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:233 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v80 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:239 +; ALIGNED-NEXT: s_waitcnt vmcnt(12) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v70 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:237 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v70 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:231 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 24, v71 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:229 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v71 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:227 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v69 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:225 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 8, v69 +; ALIGNED-NEXT: buffer_store_dword v82, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_store_dword v83, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_store_dword v80, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v82 offset:218 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v83 offset:222 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:220 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:216 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v81 offset:214 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:212 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v80 offset:210 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:208 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v68 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v68 +; ALIGNED-NEXT: s_waitcnt vmcnt(11) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v66 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v66 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:219 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v67 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:217 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v67 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:223 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v65 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:221 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v65 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:215 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v64 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:213 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v64 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:211 +; ALIGNED-NEXT: s_waitcnt vmcnt(10) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v54 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:209 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v54 +; ALIGNED-NEXT: buffer_store_dword v70, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_store_dword v71, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v70 offset:202 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v71 offset:206 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:204 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:200 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v69 offset:198 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:196 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v68 offset:194 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:192 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v52 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:203 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v53 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 24, v55 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v55 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:201 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v53 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:207 +; ALIGNED-NEXT: s_waitcnt vmcnt(9) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 24, v50 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:205 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v50 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:199 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v51 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:197 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 8, v51 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:195 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v49 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:193 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v49 +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: buffer_store_dword v65, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_store_dword v64, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v66 offset:186 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v67 offset:190 +; ALIGNED-NEXT: flat_store_byte v[96:97], v67 offset:188 +; ALIGNED-NEXT: flat_store_byte v[96:97], v66 offset:184 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v65 offset:182 +; ALIGNED-NEXT: flat_store_byte v[96:97], v65 offset:180 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v64 offset:178 +; ALIGNED-NEXT: flat_store_byte v[96:97], v64 offset:176 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v48 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v48 +; ALIGNED-NEXT: s_waitcnt vmcnt(8) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v38 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v38 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:187 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v39 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:185 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v39 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:191 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v37 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:189 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v37 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:183 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v36 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:181 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v36 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:179 +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v34 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:177 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v34 +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v54 offset:170 +; ALIGNED-NEXT: flat_store_byte v[96:97], v54 offset:168 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v55 offset:174 +; ALIGNED-NEXT: flat_store_byte v[96:97], v55 offset:172 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v52 offset:162 +; ALIGNED-NEXT: flat_store_byte v[96:97], v52 offset:160 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v53 offset:166 +; ALIGNED-NEXT: flat_store_byte v[96:97], v53 offset:164 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 24, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 8, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 24, v33 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 8, v33 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:171 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v32 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:169 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v32 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:163 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v31 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:161 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v31 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:167 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v29 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:175 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 24, v30 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:173 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v30 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:165 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v29 +; ALIGNED-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v50 offset:154 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v51 offset:158 +; ALIGNED-NEXT: flat_store_byte v[96:97], v51 offset:156 +; ALIGNED-NEXT: flat_store_byte v[96:97], v50 offset:152 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v49 offset:150 +; ALIGNED-NEXT: flat_store_byte v[96:97], v49 offset:148 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v48 offset:146 +; ALIGNED-NEXT: flat_store_byte v[96:97], v48 offset:144 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 24, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v28 +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v26 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v26 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:155 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:153 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:159 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:157 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:151 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:149 +; ALIGNED-NEXT: flat_store_byte v[96:97], v64 offset:147 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v22 +; ALIGNED-NEXT: flat_store_byte v[96:97], v65 offset:145 +; ALIGNED-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v38 offset:138 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v39 offset:142 +; ALIGNED-NEXT: flat_store_byte v[96:97], v39 offset:140 +; ALIGNED-NEXT: flat_store_byte v[96:97], v38 offset:136 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v37 offset:134 +; ALIGNED-NEXT: flat_store_byte v[96:97], v37 offset:132 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v36 offset:130 +; ALIGNED-NEXT: flat_store_byte v[96:97], v36 offset:128 +; ALIGNED-NEXT: flat_store_byte v[96:97], v66 offset:139 +; ALIGNED-NEXT: flat_store_byte v[96:97], v67 offset:137 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:143 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v18 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:141 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:135 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:133 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:131 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:129 +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v34 offset:122 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v35 offset:126 +; ALIGNED-NEXT: flat_store_byte v[96:97], v35 offset:124 +; ALIGNED-NEXT: flat_store_byte v[96:97], v34 offset:120 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v33 offset:118 +; ALIGNED-NEXT: flat_store_byte v[96:97], v33 offset:116 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v32 offset:114 +; ALIGNED-NEXT: flat_store_byte v[96:97], v32 offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 24, v14 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:123 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:121 +; ALIGNED-NEXT: flat_store_byte v[96:97], v52 offset:127 +; ALIGNED-NEXT: flat_store_byte v[96:97], v53 offset:125 +; ALIGNED-NEXT: flat_store_byte v[96:97], v54 offset:119 +; ALIGNED-NEXT: flat_store_byte v[96:97], v55 offset:117 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:115 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v10 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:113 +; ALIGNED-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v30 offset:106 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v31 offset:110 +; ALIGNED-NEXT: flat_store_byte v[96:97], v31 offset:108 +; ALIGNED-NEXT: flat_store_byte v[96:97], v30 offset:104 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v29 offset:102 +; ALIGNED-NEXT: flat_store_byte v[96:97], v29 offset:100 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v28 offset:98 +; ALIGNED-NEXT: flat_store_byte v[96:97], v28 offset:96 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:111 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 24, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 8, v14 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v10 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:109 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v6 +; ALIGNED-NEXT: s_cmp_lg_u64 s[4:5], 0x800 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 8, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 24, v11 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:103 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 24, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 24, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 8, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 8, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 24, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 8, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v17 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v17 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 24, v16 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 8, v16 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 24, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 8, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 24, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 8, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 8, v11 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v9 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 8, v9 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:107 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 24, v8 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:105 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v8 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 8, v7 +; ALIGNED-NEXT: flat_store_byte v[96:97], v48 offset:99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 24, v5 +; ALIGNED-NEXT: flat_store_byte v[96:97], v49 offset:97 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v5 +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_store_dword v24, off, s[0:3], s32 +; ALIGNED-NEXT: flat_store_byte v[96:97], v50 offset:91 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v4 +; ALIGNED-NEXT: flat_store_byte v[96:97], v51 offset:89 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v4 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v26 offset:90 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v27 offset:94 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:95 +; ALIGNED-NEXT: flat_store_byte v[96:97], v27 offset:92 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:93 +; ALIGNED-NEXT: flat_store_byte v[96:97], v26 offset:88 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:87 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v25 offset:86 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:85 +; ALIGNED-NEXT: flat_store_byte v[96:97], v25 offset:84 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:83 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v24 offset:82 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:81 +; ALIGNED-NEXT: flat_store_byte v[96:97], v24 offset:80 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v22 offset:74 +; ALIGNED-NEXT: flat_store_byte v[96:97], v64 offset:75 +; ALIGNED-NEXT: flat_store_byte v[96:97], v65 offset:73 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v23 offset:78 +; ALIGNED-NEXT: flat_store_byte v[96:97], v36 offset:79 +; ALIGNED-NEXT: flat_store_byte v[96:97], v23 offset:76 +; ALIGNED-NEXT: flat_store_byte v[96:97], v37 offset:77 +; ALIGNED-NEXT: flat_store_byte v[96:97], v22 offset:72 +; ALIGNED-NEXT: flat_store_byte v[96:97], v38 offset:71 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v21 offset:70 +; ALIGNED-NEXT: flat_store_byte v[96:97], v39 offset:69 +; ALIGNED-NEXT: flat_store_byte v[96:97], v21 offset:68 +; ALIGNED-NEXT: flat_store_byte v[96:97], v66 offset:67 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v20 offset:66 +; ALIGNED-NEXT: flat_store_byte v[96:97], v67 offset:65 +; ALIGNED-NEXT: flat_store_byte v[96:97], v20 offset:64 +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v18 offset:58 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:59 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:57 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v19 offset:62 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:63 +; ALIGNED-NEXT: flat_store_byte v[96:97], v19 offset:60 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:61 +; ALIGNED-NEXT: flat_store_byte v[96:97], v18 offset:56 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:55 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v17 offset:54 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:53 +; ALIGNED-NEXT: flat_store_byte v[96:97], v17 offset:52 +; ALIGNED-NEXT: flat_store_byte v[96:97], v32 offset:51 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v16 offset:50 +; ALIGNED-NEXT: flat_store_byte v[96:97], v33 offset:49 +; ALIGNED-NEXT: flat_store_byte v[96:97], v16 offset:48 +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v14 offset:42 +; ALIGNED-NEXT: flat_store_byte v[96:97], v34 offset:43 +; ALIGNED-NEXT: flat_store_byte v[96:97], v35 offset:41 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v15 offset:46 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:47 +; ALIGNED-NEXT: flat_store_byte v[96:97], v15 offset:44 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:45 +; ALIGNED-NEXT: flat_store_byte v[96:97], v14 offset:40 +; ALIGNED-NEXT: flat_store_byte v[96:97], v52 offset:39 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v13 offset:38 +; ALIGNED-NEXT: flat_store_byte v[96:97], v53 offset:37 +; ALIGNED-NEXT: flat_store_byte v[96:97], v13 offset:36 +; ALIGNED-NEXT: flat_store_byte v[96:97], v54 offset:35 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v12 offset:34 +; ALIGNED-NEXT: flat_store_byte v[96:97], v55 offset:33 +; ALIGNED-NEXT: flat_store_byte v[96:97], v12 offset:32 +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v10 offset:26 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:27 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:25 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v11 offset:30 +; ALIGNED-NEXT: flat_store_byte v[96:97], v28 offset:31 +; ALIGNED-NEXT: flat_store_byte v[96:97], v11 offset:28 +; ALIGNED-NEXT: flat_store_byte v[96:97], v29 offset:29 +; ALIGNED-NEXT: flat_store_byte v[96:97], v10 offset:24 +; ALIGNED-NEXT: flat_store_byte v[96:97], v30 offset:23 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v9 offset:22 +; ALIGNED-NEXT: flat_store_byte v[96:97], v31 offset:21 +; ALIGNED-NEXT: flat_store_byte v[96:97], v9 offset:20 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:19 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v8 offset:18 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:17 +; ALIGNED-NEXT: flat_store_byte v[96:97], v8 offset:16 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v6 offset:10 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:11 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:9 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v7 offset:14 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:15 +; ALIGNED-NEXT: flat_store_byte v[96:97], v7 offset:12 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:13 +; ALIGNED-NEXT: flat_store_byte v[96:97], v6 offset:8 +; ALIGNED-NEXT: flat_store_byte v[96:97], v48 offset:7 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v5 offset:6 +; ALIGNED-NEXT: flat_store_byte v[96:97], v49 offset:5 +; ALIGNED-NEXT: flat_store_byte v[96:97], v5 offset:4 +; ALIGNED-NEXT: flat_store_byte v[96:97], v50 offset:3 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v4 offset:2 +; ALIGNED-NEXT: flat_store_byte v[96:97], v51 offset:1 +; ALIGNED-NEXT: flat_store_byte v[96:97], v4 +; ALIGNED-NEXT: s_cbranch_scc1 .LBB7_2 +; ALIGNED-NEXT: .LBB7_3: ; %Flow6 +; ALIGNED-NEXT: s_andn2_saveexec_b32 s8, s6 +; ALIGNED-NEXT: s_cbranch_execz .LBB7_6 +; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader +; ALIGNED-NEXT: s_movk_i32 s6, 0xff00 +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0x700 +; ALIGNED-NEXT: s_mov_b32 s7, -1 +; ALIGNED-NEXT: .LBB7_5: ; %memmove_bwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: v_add_co_u32 v4, vcc_lo, v2, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: v_add_co_u32 v96, vcc_lo, v0, s4 +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v97, vcc_lo, s5, v1, vcc_lo +; ALIGNED-NEXT: s_clause 0xf +; ALIGNED-NEXT: global_load_dwordx4 v[98:101], v[4:5], off offset:240 +; ALIGNED-NEXT: global_load_dwordx4 v[84:87], v[4:5], off offset:224 +; ALIGNED-NEXT: global_load_dwordx4 v[80:83], v[4:5], off offset:208 +; ALIGNED-NEXT: global_load_dwordx4 v[68:71], v[4:5], off offset:192 +; ALIGNED-NEXT: global_load_dwordx4 v[64:67], v[4:5], off offset:176 +; ALIGNED-NEXT: global_load_dwordx4 v[52:55], v[4:5], off offset:160 +; ALIGNED-NEXT: global_load_dwordx4 v[48:51], v[4:5], off offset:144 +; ALIGNED-NEXT: global_load_dwordx4 v[36:39], v[4:5], off offset:128 +; ALIGNED-NEXT: global_load_dwordx4 v[32:35], v[4:5], off offset:112 +; ALIGNED-NEXT: global_load_dwordx4 v[28:31], v[4:5], off offset:96 +; ALIGNED-NEXT: global_load_dwordx4 v[24:27], v[4:5], off offset:80 +; ALIGNED-NEXT: global_load_dwordx4 v[20:23], v[4:5], off offset:64 +; ALIGNED-NEXT: global_load_dwordx4 v[16:19], v[4:5], off offset:48 +; ALIGNED-NEXT: global_load_dwordx4 v[12:15], v[4:5], off offset:32 +; ALIGNED-NEXT: global_load_dwordx4 v[8:11], v[4:5], off offset:16 +; ALIGNED-NEXT: global_load_dwordx4 v[4:7], v[4:5], off +; ALIGNED-NEXT: s_add_u32 s4, s4, 0xffffff00 +; ALIGNED-NEXT: s_addc_u32 s5, s5, -1 +; ALIGNED-NEXT: s_waitcnt vmcnt(15) +; ALIGNED-NEXT: buffer_store_dword v100, off, s[0:3], s32 offset:424 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:428 +; ALIGNED-NEXT: buffer_store_dword v99, off, s[0:3], s32 offset:420 +; ALIGNED-NEXT: buffer_store_dword v98, off, s[0:3], s32 offset:416 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v100 offset:250 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v101 offset:254 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:252 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:248 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v99 offset:246 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:244 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v98 offset:242 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:240 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v100 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v98 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v98 +; ALIGNED-NEXT: s_waitcnt vmcnt(14) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v86 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v86 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:251 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v87 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:249 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v87 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:255 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v85 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:253 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v85 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:247 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v84 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:245 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v84 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:243 +; ALIGNED-NEXT: s_waitcnt vmcnt(13) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v82 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:241 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v82 +; ALIGNED-NEXT: buffer_store_dword v86, off, s[0:3], s32 offset:440 +; ALIGNED-NEXT: buffer_store_dword v87, off, s[0:3], s32 offset:444 +; ALIGNED-NEXT: buffer_store_dword v85, off, s[0:3], s32 offset:436 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:432 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v86 offset:234 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v87 offset:238 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:236 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:232 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v85 offset:230 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:228 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v84 offset:226 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:224 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v83 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v81 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v81 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:235 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v80 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:233 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v80 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:239 +; ALIGNED-NEXT: s_waitcnt vmcnt(12) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v70 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:237 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v70 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:231 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v71 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:229 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v71 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:227 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v69 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:225 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v69 +; ALIGNED-NEXT: buffer_store_dword v82, off, s[0:3], s32 offset:392 +; ALIGNED-NEXT: buffer_store_dword v83, off, s[0:3], s32 offset:396 +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:388 +; ALIGNED-NEXT: buffer_store_dword v80, off, s[0:3], s32 offset:384 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v82 offset:218 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v83 offset:222 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:220 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:216 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v81 offset:214 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:212 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v80 offset:210 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:208 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v68 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v68 +; ALIGNED-NEXT: s_waitcnt vmcnt(11) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v66 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v66 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:219 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v67 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:217 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v67 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:223 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v65 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:221 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v65 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:215 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v64 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:213 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v64 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:211 +; ALIGNED-NEXT: s_waitcnt vmcnt(10) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v54 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:209 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v54 +; ALIGNED-NEXT: buffer_store_dword v70, off, s[0:3], s32 offset:408 +; ALIGNED-NEXT: buffer_store_dword v71, off, s[0:3], s32 offset:412 +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:404 +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:400 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v70 offset:202 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v71 offset:206 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:204 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:200 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v69 offset:198 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:196 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v68 offset:194 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:192 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 8, v55 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 24, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 8, v52 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 24, v55 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:203 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 24, v53 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:201 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v53 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:207 +; ALIGNED-NEXT: s_waitcnt vmcnt(9) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v50 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:205 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v50 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:199 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v51 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:197 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v51 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:195 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v49 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:193 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v49 +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:488 +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:492 +; ALIGNED-NEXT: buffer_store_dword v65, off, s[0:3], s32 offset:484 +; ALIGNED-NEXT: buffer_store_dword v64, off, s[0:3], s32 offset:480 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v66 offset:186 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v67 offset:190 +; ALIGNED-NEXT: flat_store_byte v[96:97], v67 offset:188 +; ALIGNED-NEXT: flat_store_byte v[96:97], v66 offset:184 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v65 offset:182 +; ALIGNED-NEXT: flat_store_byte v[96:97], v65 offset:180 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v64 offset:178 +; ALIGNED-NEXT: flat_store_byte v[96:97], v64 offset:176 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v48 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v48 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:187 +; ALIGNED-NEXT: s_waitcnt vmcnt(8) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v39 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v38 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v38 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:185 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v39 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:191 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v37 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:189 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v37 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:183 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v36 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:181 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v36 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:179 +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v34 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:177 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v34 +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:508 +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:504 +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:500 +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:496 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v54 offset:170 +; ALIGNED-NEXT: flat_store_byte v[96:97], v54 offset:168 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v55 offset:174 +; ALIGNED-NEXT: flat_store_byte v[96:97], v55 offset:172 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v52 offset:162 +; ALIGNED-NEXT: flat_store_byte v[96:97], v52 offset:160 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v53 offset:166 +; ALIGNED-NEXT: flat_store_byte v[96:97], v53 offset:164 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 24, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v35 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 24, v33 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v116, 8, v33 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:171 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v32 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:169 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v32 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:173 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 24, v31 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:163 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v31 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:161 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v29 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:175 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 24, v30 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v30 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:167 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:165 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v29 +; ALIGNED-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:456 +; ALIGNED-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:460 +; ALIGNED-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:452 +; ALIGNED-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:448 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v50 offset:154 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v51 offset:158 +; ALIGNED-NEXT: flat_store_byte v[96:97], v51 offset:156 +; ALIGNED-NEXT: flat_store_byte v[96:97], v50 offset:152 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v49 offset:150 +; ALIGNED-NEXT: flat_store_byte v[96:97], v49 offset:148 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v48 offset:146 +; ALIGNED-NEXT: flat_store_byte v[96:97], v48 offset:144 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 24, v28 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v28 +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v50, 24, v26 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:155 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:153 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:159 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:157 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:151 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:149 +; ALIGNED-NEXT: flat_store_byte v[96:97], v64 offset:147 +; ALIGNED-NEXT: flat_store_byte v[96:97], v65 offset:145 +; ALIGNED-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:472 +; ALIGNED-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:476 +; ALIGNED-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:468 +; ALIGNED-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:464 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v38 offset:138 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v39 offset:142 +; ALIGNED-NEXT: flat_store_byte v[96:97], v39 offset:140 +; ALIGNED-NEXT: flat_store_byte v[96:97], v38 offset:136 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v37 offset:134 +; ALIGNED-NEXT: flat_store_byte v[96:97], v37 offset:132 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v36 offset:130 +; ALIGNED-NEXT: flat_store_byte v[96:97], v36 offset:128 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:143 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v82, 24, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v51, 8, v26 +; ALIGNED-NEXT: flat_store_byte v[96:97], v66 offset:139 +; ALIGNED-NEXT: flat_store_byte v[96:97], v67 offset:137 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:141 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:135 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:133 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:131 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:129 +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:296 +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:300 +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:292 +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:288 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v34 offset:122 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v35 offset:126 +; ALIGNED-NEXT: flat_store_byte v[96:97], v35 offset:124 +; ALIGNED-NEXT: flat_store_byte v[96:97], v34 offset:120 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v33 offset:118 +; ALIGNED-NEXT: flat_store_byte v[96:97], v33 offset:116 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v32 offset:114 +; ALIGNED-NEXT: flat_store_byte v[96:97], v32 offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v34, 24, v14 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:123 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:121 +; ALIGNED-NEXT: flat_store_byte v[96:97], v53 offset:127 +; ALIGNED-NEXT: flat_store_byte v[96:97], v54 offset:125 +; ALIGNED-NEXT: flat_store_byte v[96:97], v55 offset:119 +; ALIGNED-NEXT: flat_store_byte v[96:97], v116 offset:117 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:115 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v114, 24, v10 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:113 +; ALIGNED-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:312 +; ALIGNED-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:316 +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:308 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:304 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v30 offset:106 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v31 offset:110 +; ALIGNED-NEXT: flat_store_byte v[96:97], v31 offset:108 +; ALIGNED-NEXT: flat_store_byte v[96:97], v30 offset:104 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v29 offset:102 +; ALIGNED-NEXT: flat_store_byte v[96:97], v29 offset:100 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v28 offset:98 +; ALIGNED-NEXT: flat_store_byte v[96:97], v28 offset:96 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:111 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshrrev_b32_e32 v69, 24, v6 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v103, 24, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v83, 8, v18 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v35, 8, v14 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v115, 8, v10 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:109 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v70, 8, v6 +; ALIGNED-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; ALIGNED-NEXT: v_lshrrev_b32_e32 v102, 8, v27 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v101, 24, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v99, 24, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v86, 24, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v28, 24, v11 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:103 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v71, 24, v7 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v100, 8, v25 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v80, 24, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v81, 8, v24 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v64, 24, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v65, 8, v22 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v36, 24, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v37, 8, v23 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v38, 24, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v39, 8, v21 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v66, 24, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v67, 8, v20 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v98, 8, v19 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v84, 24, v17 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v85, 8, v17 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v32, 24, v16 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v33, 8, v16 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v87, 8, v15 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v53, 24, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v54, 8, v13 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v55, 24, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v113, 8, v12 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v29, 8, v11 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v30, 24, v9 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v31, 8, v9 +; ALIGNED-NEXT: flat_store_byte v[96:97], v52 offset:107 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v52, 24, v8 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:105 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v68, 8, v8 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:101 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v112, 8, v7 +; ALIGNED-NEXT: flat_store_byte v[96:97], v48 offset:99 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v48, 24, v5 +; ALIGNED-NEXT: flat_store_byte v[96:97], v49 offset:97 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v49, 8, v5 +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v26 offset:90 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v27 offset:94 +; ALIGNED-NEXT: flat_store_byte v[96:97], v27 offset:92 +; ALIGNED-NEXT: flat_store_byte v[96:97], v26 offset:88 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v25 offset:86 +; ALIGNED-NEXT: flat_store_byte v[96:97], v25 offset:84 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v24 offset:82 +; ALIGNED-NEXT: flat_store_byte v[96:97], v24 offset:80 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v24, 24, v4 +; ALIGNED-NEXT: v_lshrrev_b32_e32 v25, 8, v4 +; ALIGNED-NEXT: flat_store_byte v[96:97], v50 offset:91 +; ALIGNED-NEXT: flat_store_byte v[96:97], v51 offset:89 +; ALIGNED-NEXT: flat_store_byte v[96:97], v103 offset:95 +; ALIGNED-NEXT: flat_store_byte v[96:97], v102 offset:93 +; ALIGNED-NEXT: flat_store_byte v[96:97], v101 offset:87 +; ALIGNED-NEXT: flat_store_byte v[96:97], v100 offset:85 +; ALIGNED-NEXT: flat_store_byte v[96:97], v80 offset:83 +; ALIGNED-NEXT: flat_store_byte v[96:97], v81 offset:81 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v22 offset:74 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v23 offset:78 +; ALIGNED-NEXT: flat_store_byte v[96:97], v23 offset:76 +; ALIGNED-NEXT: flat_store_byte v[96:97], v22 offset:72 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v21 offset:70 +; ALIGNED-NEXT: flat_store_byte v[96:97], v21 offset:68 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v20 offset:66 +; ALIGNED-NEXT: flat_store_byte v[96:97], v20 offset:64 +; ALIGNED-NEXT: flat_store_byte v[96:97], v64 offset:75 +; ALIGNED-NEXT: flat_store_byte v[96:97], v65 offset:73 +; ALIGNED-NEXT: flat_store_byte v[96:97], v36 offset:79 +; ALIGNED-NEXT: flat_store_byte v[96:97], v37 offset:77 +; ALIGNED-NEXT: flat_store_byte v[96:97], v38 offset:71 +; ALIGNED-NEXT: flat_store_byte v[96:97], v39 offset:69 +; ALIGNED-NEXT: flat_store_byte v[96:97], v66 offset:67 +; ALIGNED-NEXT: flat_store_byte v[96:97], v67 offset:65 +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:360 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:364 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:356 +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:352 +; ALIGNED-NEXT: flat_store_byte v[96:97], v82 offset:59 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v18 offset:58 +; ALIGNED-NEXT: flat_store_byte v[96:97], v83 offset:57 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v19 offset:62 +; ALIGNED-NEXT: flat_store_byte v[96:97], v99 offset:63 +; ALIGNED-NEXT: flat_store_byte v[96:97], v19 offset:60 +; ALIGNED-NEXT: flat_store_byte v[96:97], v98 offset:61 +; ALIGNED-NEXT: flat_store_byte v[96:97], v18 offset:56 +; ALIGNED-NEXT: flat_store_byte v[96:97], v84 offset:55 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v17 offset:54 +; ALIGNED-NEXT: flat_store_byte v[96:97], v85 offset:53 +; ALIGNED-NEXT: flat_store_byte v[96:97], v17 offset:52 +; ALIGNED-NEXT: flat_store_byte v[96:97], v32 offset:51 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v16 offset:50 +; ALIGNED-NEXT: flat_store_byte v[96:97], v33 offset:49 +; ALIGNED-NEXT: flat_store_byte v[96:97], v16 offset:48 +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:376 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:380 +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:372 +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:368 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v14 offset:42 +; ALIGNED-NEXT: flat_store_byte v[96:97], v34 offset:43 +; ALIGNED-NEXT: flat_store_byte v[96:97], v35 offset:41 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v15 offset:46 +; ALIGNED-NEXT: flat_store_byte v[96:97], v86 offset:47 +; ALIGNED-NEXT: flat_store_byte v[96:97], v15 offset:44 +; ALIGNED-NEXT: flat_store_byte v[96:97], v87 offset:45 +; ALIGNED-NEXT: flat_store_byte v[96:97], v14 offset:40 +; ALIGNED-NEXT: flat_store_byte v[96:97], v53 offset:39 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v13 offset:38 +; ALIGNED-NEXT: flat_store_byte v[96:97], v54 offset:37 +; ALIGNED-NEXT: flat_store_byte v[96:97], v13 offset:36 +; ALIGNED-NEXT: flat_store_byte v[96:97], v55 offset:35 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v12 offset:34 +; ALIGNED-NEXT: flat_store_byte v[96:97], v113 offset:33 +; ALIGNED-NEXT: flat_store_byte v[96:97], v12 offset:32 +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:328 +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:332 +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:324 +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:320 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v10 offset:26 +; ALIGNED-NEXT: flat_store_byte v[96:97], v114 offset:27 +; ALIGNED-NEXT: flat_store_byte v[96:97], v115 offset:25 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v11 offset:30 +; ALIGNED-NEXT: flat_store_byte v[96:97], v28 offset:31 +; ALIGNED-NEXT: flat_store_byte v[96:97], v11 offset:28 +; ALIGNED-NEXT: flat_store_byte v[96:97], v29 offset:29 +; ALIGNED-NEXT: flat_store_byte v[96:97], v10 offset:24 +; ALIGNED-NEXT: flat_store_byte v[96:97], v30 offset:23 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v9 offset:22 +; ALIGNED-NEXT: flat_store_byte v[96:97], v31 offset:21 +; ALIGNED-NEXT: flat_store_byte v[96:97], v9 offset:20 +; ALIGNED-NEXT: flat_store_byte v[96:97], v52 offset:19 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v8 offset:18 +; ALIGNED-NEXT: flat_store_byte v[96:97], v68 offset:17 +; ALIGNED-NEXT: flat_store_byte v[96:97], v8 offset:16 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:344 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:348 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:340 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:336 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v6 offset:10 +; ALIGNED-NEXT: flat_store_byte v[96:97], v69 offset:11 +; ALIGNED-NEXT: flat_store_byte v[96:97], v70 offset:9 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v7 offset:14 +; ALIGNED-NEXT: flat_store_byte v[96:97], v71 offset:15 +; ALIGNED-NEXT: flat_store_byte v[96:97], v7 offset:12 +; ALIGNED-NEXT: flat_store_byte v[96:97], v112 offset:13 +; ALIGNED-NEXT: flat_store_byte v[96:97], v6 offset:8 +; ALIGNED-NEXT: flat_store_byte v[96:97], v48 offset:7 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v5 offset:6 +; ALIGNED-NEXT: flat_store_byte v[96:97], v49 offset:5 +; ALIGNED-NEXT: flat_store_byte v[96:97], v5 offset:4 +; ALIGNED-NEXT: flat_store_byte v[96:97], v24 offset:3 +; ALIGNED-NEXT: flat_store_byte_d16_hi v[96:97], v4 offset:2 +; ALIGNED-NEXT: flat_store_byte v[96:97], v25 offset:1 +; ALIGNED-NEXT: flat_store_byte v[96:97], v4 +; ALIGNED-NEXT: s_cbranch_scc0 .LBB7_5 +; ALIGNED-NEXT: .LBB7_6: ; %Flow7 +; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; ALIGNED-NEXT: s_waitcnt lgkmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memmove_p0_p4_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: s_mov_b32 s4, exec_lo +; UNROLL3-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] +; UNROLL3-NEXT: s_xor_b32 s6, exec_lo, s4 +; UNROLL3-NEXT: s_cbranch_execz .LBB7_4 +; UNROLL3-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB7_2: ; %memmove_fwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: v_add_co_u32 v12, vcc_lo, v2, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_clause 0x2 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 +; UNROLL3-NEXT: global_load_dwordx4 v[8:11], v[12:13], off +; UNROLL3-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:32 +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[4:7] offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[8:11] +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[12:15] offset:32 +; UNROLL3-NEXT: s_cmp_lg_u64 s[4:5], 0x7e0 +; UNROLL3-NEXT: s_cbranch_scc1 .LBB7_2 +; UNROLL3-NEXT: ; %bb.3: ; %memmove_fwd_residual +; UNROLL3-NEXT: s_clause 0x1 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:2016 +; UNROLL3-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:2032 +; UNROLL3-NEXT: ; implicit-def: $vgpr2_vgpr3 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[8:11] offset:2032 +; UNROLL3-NEXT: ; implicit-def: $vgpr0_vgpr1 +; UNROLL3-NEXT: .LBB7_4: ; %Flow4 +; UNROLL3-NEXT: s_andn2_saveexec_b32 s8, s6 +; UNROLL3-NEXT: s_cbranch_execz .LBB7_7 +; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual +; UNROLL3-NEXT: s_clause 0x1 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:2032 +; UNROLL3-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:2016 +; UNROLL3-NEXT: s_movk_i32 s6, 0xffd0 +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0x7b0 +; UNROLL3-NEXT: s_mov_b32 s7, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:2032 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[8:11] offset:2016 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB7_6: ; %memmove_bwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: v_add_co_u32 v12, vcc_lo, v2, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_clause 0x2 +; UNROLL3-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 +; UNROLL3-NEXT: global_load_dwordx4 v[8:11], v[12:13], off +; UNROLL3-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:32 +; UNROLL3-NEXT: s_add_u32 s4, s4, 0xffffffd0 +; UNROLL3-NEXT: s_addc_u32 s5, s5, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[4:7] offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[8:11] +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[12:15] offset:32 +; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; UNROLL3-NEXT: s_cbranch_scc0 .LBB7_6 +; UNROLL3-NEXT: .LBB7_7: ; %Flow5 +; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; UNROLL3-NEXT: s_waitcnt lgkmcnt(0) +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + +define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { +; CHECK-LABEL: memmove_p5_p5_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s4, exec_lo +; CHECK-NEXT: v_cmpx_ge_u32_e64 v1, v0 +; CHECK-NEXT: s_xor_b32 s6, exec_lo, s4 +; CHECK-NEXT: s_cbranch_execz .LBB8_3 +; CHECK-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; CHECK-NEXT: s_mov_b64 s[4:5], 0x800 +; CHECK-NEXT: .LBB8_2: ; %memmove_fwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_clause 0x3e +; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:252 +; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:248 +; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:244 +; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:240 +; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:236 +; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:232 +; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:228 +; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:224 +; CHECK-NEXT: buffer_load_dword v10, v1, s[0:3], 0 offen offset:220 +; CHECK-NEXT: buffer_load_dword v11, v1, s[0:3], 0 offen offset:216 +; CHECK-NEXT: buffer_load_dword v12, v1, s[0:3], 0 offen offset:212 +; CHECK-NEXT: buffer_load_dword v13, v1, s[0:3], 0 offen offset:208 +; CHECK-NEXT: buffer_load_dword v14, v1, s[0:3], 0 offen offset:204 +; CHECK-NEXT: buffer_load_dword v15, v1, s[0:3], 0 offen offset:200 +; CHECK-NEXT: buffer_load_dword v16, v1, s[0:3], 0 offen offset:196 +; CHECK-NEXT: buffer_load_dword v17, v1, s[0:3], 0 offen offset:192 +; CHECK-NEXT: buffer_load_dword v18, v1, s[0:3], 0 offen offset:188 +; CHECK-NEXT: buffer_load_dword v19, v1, s[0:3], 0 offen offset:184 +; CHECK-NEXT: buffer_load_dword v20, v1, s[0:3], 0 offen offset:180 +; CHECK-NEXT: buffer_load_dword v21, v1, s[0:3], 0 offen offset:176 +; CHECK-NEXT: buffer_load_dword v22, v1, s[0:3], 0 offen offset:172 +; CHECK-NEXT: buffer_load_dword v23, v1, s[0:3], 0 offen offset:168 +; CHECK-NEXT: buffer_load_dword v24, v1, s[0:3], 0 offen offset:164 +; CHECK-NEXT: buffer_load_dword v25, v1, s[0:3], 0 offen offset:160 +; CHECK-NEXT: buffer_load_dword v26, v1, s[0:3], 0 offen offset:156 +; CHECK-NEXT: buffer_load_dword v27, v1, s[0:3], 0 offen offset:152 +; CHECK-NEXT: buffer_load_dword v28, v1, s[0:3], 0 offen offset:148 +; CHECK-NEXT: buffer_load_dword v29, v1, s[0:3], 0 offen offset:144 +; CHECK-NEXT: buffer_load_dword v30, v1, s[0:3], 0 offen offset:140 +; CHECK-NEXT: buffer_load_dword v31, v1, s[0:3], 0 offen offset:136 +; CHECK-NEXT: buffer_load_dword v32, v1, s[0:3], 0 offen offset:132 +; CHECK-NEXT: buffer_load_dword v33, v1, s[0:3], 0 offen offset:128 +; CHECK-NEXT: buffer_load_dword v34, v1, s[0:3], 0 offen offset:124 +; CHECK-NEXT: buffer_load_dword v35, v1, s[0:3], 0 offen offset:120 +; CHECK-NEXT: buffer_load_dword v36, v1, s[0:3], 0 offen offset:116 +; CHECK-NEXT: buffer_load_dword v37, v1, s[0:3], 0 offen offset:112 +; CHECK-NEXT: buffer_load_dword v38, v1, s[0:3], 0 offen offset:108 +; CHECK-NEXT: buffer_load_dword v39, v1, s[0:3], 0 offen offset:104 +; CHECK-NEXT: buffer_load_dword v48, v1, s[0:3], 0 offen offset:100 +; CHECK-NEXT: buffer_load_dword v49, v1, s[0:3], 0 offen offset:96 +; CHECK-NEXT: buffer_load_dword v50, v1, s[0:3], 0 offen offset:92 +; CHECK-NEXT: buffer_load_dword v51, v1, s[0:3], 0 offen offset:88 +; CHECK-NEXT: buffer_load_dword v52, v1, s[0:3], 0 offen offset:84 +; CHECK-NEXT: buffer_load_dword v53, v1, s[0:3], 0 offen offset:80 +; CHECK-NEXT: buffer_load_dword v54, v1, s[0:3], 0 offen offset:76 +; CHECK-NEXT: buffer_load_dword v55, v1, s[0:3], 0 offen offset:72 +; CHECK-NEXT: buffer_load_dword v64, v1, s[0:3], 0 offen offset:68 +; CHECK-NEXT: buffer_load_dword v65, v1, s[0:3], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v66, v1, s[0:3], 0 offen offset:60 +; CHECK-NEXT: buffer_load_dword v67, v1, s[0:3], 0 offen offset:56 +; CHECK-NEXT: buffer_load_dword v68, v1, s[0:3], 0 offen offset:52 +; CHECK-NEXT: buffer_load_dword v69, v1, s[0:3], 0 offen offset:48 +; CHECK-NEXT: buffer_load_dword v70, v1, s[0:3], 0 offen offset:44 +; CHECK-NEXT: buffer_load_dword v71, v1, s[0:3], 0 offen offset:40 +; CHECK-NEXT: buffer_load_dword v80, v1, s[0:3], 0 offen offset:36 +; CHECK-NEXT: buffer_load_dword v81, v1, s[0:3], 0 offen offset:32 +; CHECK-NEXT: buffer_load_dword v82, v1, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_load_dword v83, v1, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_load_dword v84, v1, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_load_dword v85, v1, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v86, v1, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_load_dword v87, v1, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v96, v1, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v97, v1, s[0:3], 0 offen +; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 +; CHECK-NEXT: s_add_u32 s4, s4, 0xffffff00 +; CHECK-NEXT: s_addc_u32 s5, s5, -1 +; CHECK-NEXT: s_waitcnt vmcnt(62) +; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:252 +; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:248 +; CHECK-NEXT: s_waitcnt vmcnt(61) +; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:244 +; CHECK-NEXT: s_waitcnt vmcnt(60) +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(59) +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:236 +; CHECK-NEXT: s_waitcnt vmcnt(58) +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:232 +; CHECK-NEXT: s_waitcnt vmcnt(57) +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:228 +; CHECK-NEXT: s_waitcnt vmcnt(56) +; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(55) +; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:220 +; CHECK-NEXT: s_waitcnt vmcnt(54) +; CHECK-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:216 +; CHECK-NEXT: s_waitcnt vmcnt(53) +; CHECK-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:212 +; CHECK-NEXT: s_waitcnt vmcnt(52) +; CHECK-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(51) +; CHECK-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:204 +; CHECK-NEXT: s_waitcnt vmcnt(50) +; CHECK-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:200 +; CHECK-NEXT: s_waitcnt vmcnt(49) +; CHECK-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:196 +; CHECK-NEXT: s_waitcnt vmcnt(48) +; CHECK-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(47) +; CHECK-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:188 +; CHECK-NEXT: s_waitcnt vmcnt(46) +; CHECK-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:184 +; CHECK-NEXT: s_waitcnt vmcnt(45) +; CHECK-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:180 +; CHECK-NEXT: s_waitcnt vmcnt(44) +; CHECK-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(43) +; CHECK-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:172 +; CHECK-NEXT: s_waitcnt vmcnt(42) +; CHECK-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:168 +; CHECK-NEXT: s_waitcnt vmcnt(41) +; CHECK-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:164 +; CHECK-NEXT: s_waitcnt vmcnt(40) +; CHECK-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(39) +; CHECK-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:156 +; CHECK-NEXT: s_waitcnt vmcnt(38) +; CHECK-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:152 +; CHECK-NEXT: s_waitcnt vmcnt(37) +; CHECK-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:148 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:140 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:136 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:132 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen offset:116 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen offset:104 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen offset:100 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen offset:88 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen offset:84 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen offset:76 +; CHECK-NEXT: s_waitcnt vmcnt(18) +; CHECK-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(17) +; CHECK-NEXT: buffer_store_dword v64, v0, s[0:3], 0 offen offset:68 +; CHECK-NEXT: s_waitcnt vmcnt(16) +; CHECK-NEXT: buffer_store_dword v65, v0, s[0:3], 0 offen offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(15) +; CHECK-NEXT: buffer_store_dword v66, v0, s[0:3], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(14) +; CHECK-NEXT: buffer_store_dword v67, v0, s[0:3], 0 offen offset:56 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: buffer_store_dword v68, v0, s[0:3], 0 offen offset:52 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: buffer_store_dword v69, v0, s[0:3], 0 offen offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(11) +; CHECK-NEXT: buffer_store_dword v70, v0, s[0:3], 0 offen offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(10) +; CHECK-NEXT: buffer_store_dword v71, v0, s[0:3], 0 offen offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: buffer_store_dword v80, v0, s[0:3], 0 offen offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: buffer_store_dword v81, v0, s[0:3], 0 offen offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(7) +; CHECK-NEXT: buffer_store_dword v82, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: buffer_store_dword v83, v0, s[0:3], 0 offen offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: buffer_store_dword v84, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: buffer_store_dword v85, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: buffer_store_dword v86, v0, s[0:3], 0 offen offset:12 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: buffer_store_dword v87, v0, s[0:3], 0 offen offset:8 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: buffer_store_dword v96, v0, s[0:3], 0 offen offset:4 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: buffer_store_dword v97, v0, s[0:3], 0 offen +; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x100, v0 +; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB8_2 +; CHECK-NEXT: .LBB8_3: ; %Flow18 +; CHECK-NEXT: s_andn2_saveexec_b32 s6, s6 +; CHECK-NEXT: s_cbranch_execz .LBB8_6 +; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader +; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0 +; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 +; CHECK-NEXT: s_movk_i32 s4, 0xf800 +; CHECK-NEXT: s_mov_b32 s5, -1 +; CHECK-NEXT: .LBB8_5: ; %memmove_bwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_clause 0x3e +; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:252 +; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:248 +; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:244 +; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:240 +; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:236 +; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:232 +; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:228 +; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:224 +; CHECK-NEXT: buffer_load_dword v10, v1, s[0:3], 0 offen offset:220 +; CHECK-NEXT: buffer_load_dword v11, v1, s[0:3], 0 offen offset:216 +; CHECK-NEXT: buffer_load_dword v12, v1, s[0:3], 0 offen offset:212 +; CHECK-NEXT: buffer_load_dword v13, v1, s[0:3], 0 offen offset:208 +; CHECK-NEXT: buffer_load_dword v14, v1, s[0:3], 0 offen offset:204 +; CHECK-NEXT: buffer_load_dword v15, v1, s[0:3], 0 offen offset:200 +; CHECK-NEXT: buffer_load_dword v16, v1, s[0:3], 0 offen offset:196 +; CHECK-NEXT: buffer_load_dword v17, v1, s[0:3], 0 offen offset:192 +; CHECK-NEXT: buffer_load_dword v18, v1, s[0:3], 0 offen offset:188 +; CHECK-NEXT: buffer_load_dword v19, v1, s[0:3], 0 offen offset:184 +; CHECK-NEXT: buffer_load_dword v20, v1, s[0:3], 0 offen offset:180 +; CHECK-NEXT: buffer_load_dword v21, v1, s[0:3], 0 offen offset:176 +; CHECK-NEXT: buffer_load_dword v22, v1, s[0:3], 0 offen offset:172 +; CHECK-NEXT: buffer_load_dword v23, v1, s[0:3], 0 offen offset:168 +; CHECK-NEXT: buffer_load_dword v24, v1, s[0:3], 0 offen offset:164 +; CHECK-NEXT: buffer_load_dword v25, v1, s[0:3], 0 offen offset:160 +; CHECK-NEXT: buffer_load_dword v26, v1, s[0:3], 0 offen offset:156 +; CHECK-NEXT: buffer_load_dword v27, v1, s[0:3], 0 offen offset:152 +; CHECK-NEXT: buffer_load_dword v28, v1, s[0:3], 0 offen offset:148 +; CHECK-NEXT: buffer_load_dword v29, v1, s[0:3], 0 offen offset:144 +; CHECK-NEXT: buffer_load_dword v30, v1, s[0:3], 0 offen offset:140 +; CHECK-NEXT: buffer_load_dword v31, v1, s[0:3], 0 offen offset:136 +; CHECK-NEXT: buffer_load_dword v32, v1, s[0:3], 0 offen offset:132 +; CHECK-NEXT: buffer_load_dword v33, v1, s[0:3], 0 offen offset:128 +; CHECK-NEXT: buffer_load_dword v34, v1, s[0:3], 0 offen offset:124 +; CHECK-NEXT: buffer_load_dword v35, v1, s[0:3], 0 offen offset:120 +; CHECK-NEXT: buffer_load_dword v36, v1, s[0:3], 0 offen offset:116 +; CHECK-NEXT: buffer_load_dword v37, v1, s[0:3], 0 offen offset:112 +; CHECK-NEXT: buffer_load_dword v38, v1, s[0:3], 0 offen offset:108 +; CHECK-NEXT: buffer_load_dword v39, v1, s[0:3], 0 offen offset:104 +; CHECK-NEXT: buffer_load_dword v48, v1, s[0:3], 0 offen offset:100 +; CHECK-NEXT: buffer_load_dword v49, v1, s[0:3], 0 offen offset:96 +; CHECK-NEXT: buffer_load_dword v50, v1, s[0:3], 0 offen offset:92 +; CHECK-NEXT: buffer_load_dword v51, v1, s[0:3], 0 offen offset:88 +; CHECK-NEXT: buffer_load_dword v52, v1, s[0:3], 0 offen offset:84 +; CHECK-NEXT: buffer_load_dword v53, v1, s[0:3], 0 offen offset:80 +; CHECK-NEXT: buffer_load_dword v54, v1, s[0:3], 0 offen offset:76 +; CHECK-NEXT: buffer_load_dword v55, v1, s[0:3], 0 offen offset:72 +; CHECK-NEXT: buffer_load_dword v64, v1, s[0:3], 0 offen offset:68 +; CHECK-NEXT: buffer_load_dword v65, v1, s[0:3], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v66, v1, s[0:3], 0 offen offset:60 +; CHECK-NEXT: buffer_load_dword v67, v1, s[0:3], 0 offen offset:56 +; CHECK-NEXT: buffer_load_dword v68, v1, s[0:3], 0 offen offset:52 +; CHECK-NEXT: buffer_load_dword v69, v1, s[0:3], 0 offen offset:48 +; CHECK-NEXT: buffer_load_dword v70, v1, s[0:3], 0 offen offset:44 +; CHECK-NEXT: buffer_load_dword v71, v1, s[0:3], 0 offen offset:40 +; CHECK-NEXT: buffer_load_dword v80, v1, s[0:3], 0 offen offset:36 +; CHECK-NEXT: buffer_load_dword v81, v1, s[0:3], 0 offen offset:32 +; CHECK-NEXT: buffer_load_dword v82, v1, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_load_dword v83, v1, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_load_dword v84, v1, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_load_dword v85, v1, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v86, v1, s[0:3], 0 offen offset:12 +; CHECK-NEXT: buffer_load_dword v87, v1, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v96, v1, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v97, v1, s[0:3], 0 offen +; CHECK-NEXT: v_add_nc_u32_e32 v1, 0xffffff00, v1 +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: s_waitcnt vmcnt(62) +; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:252 +; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:248 +; CHECK-NEXT: s_waitcnt vmcnt(61) +; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:244 +; CHECK-NEXT: s_waitcnt vmcnt(60) +; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(59) +; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:236 +; CHECK-NEXT: s_waitcnt vmcnt(58) +; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:232 +; CHECK-NEXT: s_waitcnt vmcnt(57) +; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:228 +; CHECK-NEXT: s_waitcnt vmcnt(56) +; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(55) +; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:220 +; CHECK-NEXT: s_waitcnt vmcnt(54) +; CHECK-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:216 +; CHECK-NEXT: s_waitcnt vmcnt(53) +; CHECK-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:212 +; CHECK-NEXT: s_waitcnt vmcnt(52) +; CHECK-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(51) +; CHECK-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:204 +; CHECK-NEXT: s_waitcnt vmcnt(50) +; CHECK-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:200 +; CHECK-NEXT: s_waitcnt vmcnt(49) +; CHECK-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:196 +; CHECK-NEXT: s_waitcnt vmcnt(48) +; CHECK-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(47) +; CHECK-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:188 +; CHECK-NEXT: s_waitcnt vmcnt(46) +; CHECK-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:184 +; CHECK-NEXT: s_waitcnt vmcnt(45) +; CHECK-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:180 +; CHECK-NEXT: s_waitcnt vmcnt(44) +; CHECK-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(43) +; CHECK-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:172 +; CHECK-NEXT: s_waitcnt vmcnt(42) +; CHECK-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:168 +; CHECK-NEXT: s_waitcnt vmcnt(41) +; CHECK-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:164 +; CHECK-NEXT: s_waitcnt vmcnt(40) +; CHECK-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(39) +; CHECK-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:156 +; CHECK-NEXT: s_waitcnt vmcnt(38) +; CHECK-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:152 +; CHECK-NEXT: s_waitcnt vmcnt(37) +; CHECK-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:148 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:140 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:136 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:132 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:128 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen offset:116 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen offset:104 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen offset:100 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen offset:88 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen offset:84 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen offset:76 +; CHECK-NEXT: s_waitcnt vmcnt(18) +; CHECK-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(17) +; CHECK-NEXT: buffer_store_dword v64, v0, s[0:3], 0 offen offset:68 +; CHECK-NEXT: s_waitcnt vmcnt(16) +; CHECK-NEXT: buffer_store_dword v65, v0, s[0:3], 0 offen offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(15) +; CHECK-NEXT: buffer_store_dword v66, v0, s[0:3], 0 offen offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(14) +; CHECK-NEXT: buffer_store_dword v67, v0, s[0:3], 0 offen offset:56 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: buffer_store_dword v68, v0, s[0:3], 0 offen offset:52 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: buffer_store_dword v69, v0, s[0:3], 0 offen offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(11) +; CHECK-NEXT: buffer_store_dword v70, v0, s[0:3], 0 offen offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(10) +; CHECK-NEXT: buffer_store_dword v71, v0, s[0:3], 0 offen offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: buffer_store_dword v80, v0, s[0:3], 0 offen offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: buffer_store_dword v81, v0, s[0:3], 0 offen offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(7) +; CHECK-NEXT: buffer_store_dword v82, v0, s[0:3], 0 offen offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(6) +; CHECK-NEXT: buffer_store_dword v83, v0, s[0:3], 0 offen offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: buffer_store_dword v84, v0, s[0:3], 0 offen offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: buffer_store_dword v85, v0, s[0:3], 0 offen offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: buffer_store_dword v86, v0, s[0:3], 0 offen offset:12 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: buffer_store_dword v87, v0, s[0:3], 0 offen offset:8 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: buffer_store_dword v96, v0, s[0:3], 0 offen offset:4 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: buffer_store_dword v97, v0, s[0:3], 0 offen +; CHECK-NEXT: v_add_nc_u32_e32 v0, 0xffffff00, v0 +; CHECK-NEXT: s_cmp_eq_u64 s[4:5], 0 +; CHECK-NEXT: s_cbranch_scc0 .LBB8_5 +; CHECK-NEXT: .LBB8_6: ; %Flow19 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6 +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memmove_p5_p5_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v72, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v73, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v74, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v75, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v76, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v79, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v88, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v89, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v92, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v93, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v95, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v105, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v106, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v107, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v108, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v109, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v110, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v111, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v120, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v121, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v122, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v124, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b32 s4, exec_lo +; ALIGNED-NEXT: v_cmpx_ge_u32_e64 v1, v0 +; ALIGNED-NEXT: s_xor_b32 s6, exec_lo, s4 +; ALIGNED-NEXT: s_cbranch_execz .LBB8_3 +; ALIGNED-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0x800 +; ALIGNED-NEXT: .LBB8_2: ; %memmove_fwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:255 +; ALIGNED-NEXT: s_add_u32 s4, s4, 0xffffff00 +; ALIGNED-NEXT: s_addc_u32 s5, s5, -1 +; ALIGNED-NEXT: s_cmp_lg_u64 s[4:5], 0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:254 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:253 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:252 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:251 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:250 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:249 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:248 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x3e +; ALIGNED-NEXT: buffer_load_ubyte v123, v1, s[0:3], 0 offen offset:247 +; ALIGNED-NEXT: buffer_load_ubyte v122, v1, s[0:3], 0 offen offset:246 +; ALIGNED-NEXT: buffer_load_ubyte v121, v1, s[0:3], 0 offen offset:245 +; ALIGNED-NEXT: buffer_load_ubyte v120, v1, s[0:3], 0 offen offset:244 +; ALIGNED-NEXT: buffer_load_ubyte v111, v1, s[0:3], 0 offen offset:243 +; ALIGNED-NEXT: buffer_load_ubyte v110, v1, s[0:3], 0 offen offset:242 +; ALIGNED-NEXT: buffer_load_ubyte v109, v1, s[0:3], 0 offen offset:241 +; ALIGNED-NEXT: buffer_load_ubyte v108, v1, s[0:3], 0 offen offset:240 +; ALIGNED-NEXT: buffer_load_ubyte v107, v1, s[0:3], 0 offen offset:239 +; ALIGNED-NEXT: buffer_load_ubyte v106, v1, s[0:3], 0 offen offset:238 +; ALIGNED-NEXT: buffer_load_ubyte v105, v1, s[0:3], 0 offen offset:237 +; ALIGNED-NEXT: buffer_load_ubyte v104, v1, s[0:3], 0 offen offset:236 +; ALIGNED-NEXT: buffer_load_ubyte v95, v1, s[0:3], 0 offen offset:235 +; ALIGNED-NEXT: buffer_load_ubyte v94, v1, s[0:3], 0 offen offset:234 +; ALIGNED-NEXT: buffer_load_ubyte v93, v1, s[0:3], 0 offen offset:233 +; ALIGNED-NEXT: buffer_load_ubyte v92, v1, s[0:3], 0 offen offset:232 +; ALIGNED-NEXT: buffer_load_ubyte v91, v1, s[0:3], 0 offen offset:231 +; ALIGNED-NEXT: buffer_load_ubyte v90, v1, s[0:3], 0 offen offset:230 +; ALIGNED-NEXT: buffer_load_ubyte v89, v1, s[0:3], 0 offen offset:229 +; ALIGNED-NEXT: buffer_load_ubyte v88, v1, s[0:3], 0 offen offset:228 +; ALIGNED-NEXT: buffer_load_ubyte v79, v1, s[0:3], 0 offen offset:227 +; ALIGNED-NEXT: buffer_load_ubyte v78, v1, s[0:3], 0 offen offset:226 +; ALIGNED-NEXT: buffer_load_ubyte v77, v1, s[0:3], 0 offen offset:225 +; ALIGNED-NEXT: buffer_load_ubyte v76, v1, s[0:3], 0 offen offset:224 +; ALIGNED-NEXT: buffer_load_ubyte v75, v1, s[0:3], 0 offen offset:223 +; ALIGNED-NEXT: buffer_load_ubyte v74, v1, s[0:3], 0 offen offset:222 +; ALIGNED-NEXT: buffer_load_ubyte v73, v1, s[0:3], 0 offen offset:221 +; ALIGNED-NEXT: buffer_load_ubyte v72, v1, s[0:3], 0 offen offset:220 +; ALIGNED-NEXT: buffer_load_ubyte v63, v1, s[0:3], 0 offen offset:219 +; ALIGNED-NEXT: buffer_load_ubyte v62, v1, s[0:3], 0 offen offset:218 +; ALIGNED-NEXT: buffer_load_ubyte v61, v1, s[0:3], 0 offen offset:217 +; ALIGNED-NEXT: buffer_load_ubyte v60, v1, s[0:3], 0 offen offset:216 +; ALIGNED-NEXT: buffer_load_ubyte v59, v1, s[0:3], 0 offen offset:215 +; ALIGNED-NEXT: buffer_load_ubyte v58, v1, s[0:3], 0 offen offset:214 +; ALIGNED-NEXT: buffer_load_ubyte v57, v1, s[0:3], 0 offen offset:213 +; ALIGNED-NEXT: buffer_load_ubyte v56, v1, s[0:3], 0 offen offset:212 +; ALIGNED-NEXT: buffer_load_ubyte v47, v1, s[0:3], 0 offen offset:211 +; ALIGNED-NEXT: buffer_load_ubyte v46, v1, s[0:3], 0 offen offset:210 +; ALIGNED-NEXT: buffer_load_ubyte v45, v1, s[0:3], 0 offen offset:209 +; ALIGNED-NEXT: buffer_load_ubyte v44, v1, s[0:3], 0 offen offset:208 +; ALIGNED-NEXT: buffer_load_ubyte v43, v1, s[0:3], 0 offen offset:207 +; ALIGNED-NEXT: buffer_load_ubyte v42, v1, s[0:3], 0 offen offset:206 +; ALIGNED-NEXT: buffer_load_ubyte v41, v1, s[0:3], 0 offen offset:205 +; ALIGNED-NEXT: buffer_load_ubyte v40, v1, s[0:3], 0 offen offset:204 +; ALIGNED-NEXT: buffer_load_ubyte v119, v1, s[0:3], 0 offen offset:203 +; ALIGNED-NEXT: buffer_load_ubyte v118, v1, s[0:3], 0 offen offset:202 +; ALIGNED-NEXT: buffer_load_ubyte v117, v1, s[0:3], 0 offen offset:201 +; ALIGNED-NEXT: buffer_load_ubyte v116, v1, s[0:3], 0 offen offset:200 +; ALIGNED-NEXT: buffer_load_ubyte v115, v1, s[0:3], 0 offen offset:199 +; ALIGNED-NEXT: buffer_load_ubyte v114, v1, s[0:3], 0 offen offset:198 +; ALIGNED-NEXT: buffer_load_ubyte v113, v1, s[0:3], 0 offen offset:197 +; ALIGNED-NEXT: buffer_load_ubyte v112, v1, s[0:3], 0 offen offset:196 +; ALIGNED-NEXT: buffer_load_ubyte v103, v1, s[0:3], 0 offen offset:195 +; ALIGNED-NEXT: buffer_load_ubyte v102, v1, s[0:3], 0 offen offset:194 +; ALIGNED-NEXT: buffer_load_ubyte v101, v1, s[0:3], 0 offen offset:193 +; ALIGNED-NEXT: buffer_load_ubyte v100, v1, s[0:3], 0 offen offset:192 +; ALIGNED-NEXT: buffer_load_ubyte v99, v1, s[0:3], 0 offen offset:191 +; ALIGNED-NEXT: buffer_load_ubyte v98, v1, s[0:3], 0 offen offset:190 +; ALIGNED-NEXT: buffer_load_ubyte v97, v1, s[0:3], 0 offen offset:189 +; ALIGNED-NEXT: buffer_load_ubyte v96, v1, s[0:3], 0 offen offset:188 +; ALIGNED-NEXT: buffer_load_ubyte v87, v1, s[0:3], 0 offen offset:187 +; ALIGNED-NEXT: buffer_load_ubyte v86, v1, s[0:3], 0 offen offset:186 +; ALIGNED-NEXT: buffer_load_ubyte v85, v1, s[0:3], 0 offen offset:185 +; ALIGNED-NEXT: s_clause 0x3a +; ALIGNED-NEXT: buffer_load_ubyte v84, v1, s[0:3], 0 offen offset:184 +; ALIGNED-NEXT: buffer_load_ubyte v83, v1, s[0:3], 0 offen offset:183 +; ALIGNED-NEXT: buffer_load_ubyte v82, v1, s[0:3], 0 offen offset:182 +; ALIGNED-NEXT: buffer_load_ubyte v81, v1, s[0:3], 0 offen offset:181 +; ALIGNED-NEXT: buffer_load_ubyte v80, v1, s[0:3], 0 offen offset:180 +; ALIGNED-NEXT: buffer_load_ubyte v71, v1, s[0:3], 0 offen offset:179 +; ALIGNED-NEXT: buffer_load_ubyte v70, v1, s[0:3], 0 offen offset:178 +; ALIGNED-NEXT: buffer_load_ubyte v69, v1, s[0:3], 0 offen offset:177 +; ALIGNED-NEXT: buffer_load_ubyte v68, v1, s[0:3], 0 offen offset:176 +; ALIGNED-NEXT: buffer_load_ubyte v67, v1, s[0:3], 0 offen offset:175 +; ALIGNED-NEXT: buffer_load_ubyte v66, v1, s[0:3], 0 offen offset:174 +; ALIGNED-NEXT: buffer_load_ubyte v65, v1, s[0:3], 0 offen offset:173 +; ALIGNED-NEXT: buffer_load_ubyte v64, v1, s[0:3], 0 offen offset:172 +; ALIGNED-NEXT: buffer_load_ubyte v55, v1, s[0:3], 0 offen offset:171 +; ALIGNED-NEXT: buffer_load_ubyte v54, v1, s[0:3], 0 offen offset:170 +; ALIGNED-NEXT: buffer_load_ubyte v53, v1, s[0:3], 0 offen offset:169 +; ALIGNED-NEXT: buffer_load_ubyte v52, v1, s[0:3], 0 offen offset:168 +; ALIGNED-NEXT: buffer_load_ubyte v51, v1, s[0:3], 0 offen offset:167 +; ALIGNED-NEXT: buffer_load_ubyte v50, v1, s[0:3], 0 offen offset:166 +; ALIGNED-NEXT: buffer_load_ubyte v49, v1, s[0:3], 0 offen offset:165 +; ALIGNED-NEXT: buffer_load_ubyte v48, v1, s[0:3], 0 offen offset:164 +; ALIGNED-NEXT: buffer_load_ubyte v39, v1, s[0:3], 0 offen offset:163 +; ALIGNED-NEXT: buffer_load_ubyte v38, v1, s[0:3], 0 offen offset:162 +; ALIGNED-NEXT: buffer_load_ubyte v37, v1, s[0:3], 0 offen offset:161 +; ALIGNED-NEXT: buffer_load_ubyte v36, v1, s[0:3], 0 offen offset:160 +; ALIGNED-NEXT: buffer_load_ubyte v35, v1, s[0:3], 0 offen offset:159 +; ALIGNED-NEXT: buffer_load_ubyte v34, v1, s[0:3], 0 offen offset:158 +; ALIGNED-NEXT: buffer_load_ubyte v33, v1, s[0:3], 0 offen offset:157 +; ALIGNED-NEXT: buffer_load_ubyte v32, v1, s[0:3], 0 offen offset:156 +; ALIGNED-NEXT: buffer_load_ubyte v31, v1, s[0:3], 0 offen offset:155 +; ALIGNED-NEXT: buffer_load_ubyte v30, v1, s[0:3], 0 offen offset:154 +; ALIGNED-NEXT: buffer_load_ubyte v29, v1, s[0:3], 0 offen offset:153 +; ALIGNED-NEXT: buffer_load_ubyte v28, v1, s[0:3], 0 offen offset:152 +; ALIGNED-NEXT: buffer_load_ubyte v27, v1, s[0:3], 0 offen offset:151 +; ALIGNED-NEXT: buffer_load_ubyte v26, v1, s[0:3], 0 offen offset:150 +; ALIGNED-NEXT: buffer_load_ubyte v25, v1, s[0:3], 0 offen offset:149 +; ALIGNED-NEXT: buffer_load_ubyte v24, v1, s[0:3], 0 offen offset:148 +; ALIGNED-NEXT: buffer_load_ubyte v23, v1, s[0:3], 0 offen offset:147 +; ALIGNED-NEXT: buffer_load_ubyte v22, v1, s[0:3], 0 offen offset:146 +; ALIGNED-NEXT: buffer_load_ubyte v21, v1, s[0:3], 0 offen offset:145 +; ALIGNED-NEXT: buffer_load_ubyte v20, v1, s[0:3], 0 offen offset:144 +; ALIGNED-NEXT: buffer_load_ubyte v19, v1, s[0:3], 0 offen offset:143 +; ALIGNED-NEXT: buffer_load_ubyte v18, v1, s[0:3], 0 offen offset:142 +; ALIGNED-NEXT: buffer_load_ubyte v17, v1, s[0:3], 0 offen offset:141 +; ALIGNED-NEXT: buffer_load_ubyte v16, v1, s[0:3], 0 offen offset:140 +; ALIGNED-NEXT: buffer_load_ubyte v15, v1, s[0:3], 0 offen offset:139 +; ALIGNED-NEXT: buffer_load_ubyte v14, v1, s[0:3], 0 offen offset:138 +; ALIGNED-NEXT: buffer_load_ubyte v13, v1, s[0:3], 0 offen offset:137 +; ALIGNED-NEXT: buffer_load_ubyte v12, v1, s[0:3], 0 offen offset:136 +; ALIGNED-NEXT: buffer_load_ubyte v11, v1, s[0:3], 0 offen offset:135 +; ALIGNED-NEXT: buffer_load_ubyte v10, v1, s[0:3], 0 offen offset:134 +; ALIGNED-NEXT: buffer_load_ubyte v9, v1, s[0:3], 0 offen offset:133 +; ALIGNED-NEXT: buffer_load_ubyte v8, v1, s[0:3], 0 offen offset:132 +; ALIGNED-NEXT: buffer_load_ubyte v7, v1, s[0:3], 0 offen offset:131 +; ALIGNED-NEXT: buffer_load_ubyte v6, v1, s[0:3], 0 offen offset:130 +; ALIGNED-NEXT: buffer_load_ubyte v5, v1, s[0:3], 0 offen offset:129 +; ALIGNED-NEXT: buffer_load_ubyte v4, v1, s[0:3], 0 offen offset:128 +; ALIGNED-NEXT: buffer_load_ubyte v3, v1, s[0:3], 0 offen offset:127 +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:126 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v127, v1, s[0:3], 0 offen offset:125 +; ALIGNED-NEXT: buffer_load_ubyte v126, v1, s[0:3], 0 offen offset:124 +; ALIGNED-NEXT: buffer_load_ubyte v125, v1, s[0:3], 0 offen offset:123 +; ALIGNED-NEXT: buffer_load_ubyte v124, v1, s[0:3], 0 offen offset:122 +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:121 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:119 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:118 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:117 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:116 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:115 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:114 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:113 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:111 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:110 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:109 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:107 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:106 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:105 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:103 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:102 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:101 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:100 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:99 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:98 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:97 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:96 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:95 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:94 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:93 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:91 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:90 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:89 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:87 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:86 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:85 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:84 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:83 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:82 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:81 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:80 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:79 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:78 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:77 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:76 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:75 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:74 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:73 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:72 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:71 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:70 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:69 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:68 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:67 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:66 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:65 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:64 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:63 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:62 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:61 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:60 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:59 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:58 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:57 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:56 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:55 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:54 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:53 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:52 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:51 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:50 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:49 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:48 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:47 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:46 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:45 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:44 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:43 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:42 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:41 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:40 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:39 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:38 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:37 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:36 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:35 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:34 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:33 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:31 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:30 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:29 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:27 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:26 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:25 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:24 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:23 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:22 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:21 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:20 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:19 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:18 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:17 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:16 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:15 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:14 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:13 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:12 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:11 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:10 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:9 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:8 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:7 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:6 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:5 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:3 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:2 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:1 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen +; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x100, v1 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:255 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:254 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:253 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:252 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:251 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:250 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:249 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:248 +; ALIGNED-NEXT: buffer_store_byte v123, v0, s[0:3], 0 offen offset:247 +; ALIGNED-NEXT: buffer_store_byte v122, v0, s[0:3], 0 offen offset:246 +; ALIGNED-NEXT: buffer_store_byte v121, v0, s[0:3], 0 offen offset:245 +; ALIGNED-NEXT: buffer_store_byte v120, v0, s[0:3], 0 offen offset:244 +; ALIGNED-NEXT: buffer_store_byte v111, v0, s[0:3], 0 offen offset:243 +; ALIGNED-NEXT: buffer_store_byte v110, v0, s[0:3], 0 offen offset:242 +; ALIGNED-NEXT: buffer_store_byte v109, v0, s[0:3], 0 offen offset:241 +; ALIGNED-NEXT: buffer_store_byte v108, v0, s[0:3], 0 offen offset:240 +; ALIGNED-NEXT: buffer_store_byte v107, v0, s[0:3], 0 offen offset:239 +; ALIGNED-NEXT: buffer_store_byte v106, v0, s[0:3], 0 offen offset:238 +; ALIGNED-NEXT: buffer_store_byte v105, v0, s[0:3], 0 offen offset:237 +; ALIGNED-NEXT: buffer_store_byte v104, v0, s[0:3], 0 offen offset:236 +; ALIGNED-NEXT: buffer_store_byte v95, v0, s[0:3], 0 offen offset:235 +; ALIGNED-NEXT: buffer_store_byte v94, v0, s[0:3], 0 offen offset:234 +; ALIGNED-NEXT: buffer_store_byte v93, v0, s[0:3], 0 offen offset:233 +; ALIGNED-NEXT: buffer_store_byte v92, v0, s[0:3], 0 offen offset:232 +; ALIGNED-NEXT: buffer_store_byte v91, v0, s[0:3], 0 offen offset:231 +; ALIGNED-NEXT: buffer_store_byte v90, v0, s[0:3], 0 offen offset:230 +; ALIGNED-NEXT: buffer_store_byte v89, v0, s[0:3], 0 offen offset:229 +; ALIGNED-NEXT: buffer_store_byte v88, v0, s[0:3], 0 offen offset:228 +; ALIGNED-NEXT: buffer_store_byte v79, v0, s[0:3], 0 offen offset:227 +; ALIGNED-NEXT: buffer_store_byte v78, v0, s[0:3], 0 offen offset:226 +; ALIGNED-NEXT: buffer_store_byte v77, v0, s[0:3], 0 offen offset:225 +; ALIGNED-NEXT: buffer_store_byte v76, v0, s[0:3], 0 offen offset:224 +; ALIGNED-NEXT: buffer_store_byte v75, v0, s[0:3], 0 offen offset:223 +; ALIGNED-NEXT: buffer_store_byte v74, v0, s[0:3], 0 offen offset:222 +; ALIGNED-NEXT: buffer_store_byte v73, v0, s[0:3], 0 offen offset:221 +; ALIGNED-NEXT: buffer_store_byte v72, v0, s[0:3], 0 offen offset:220 +; ALIGNED-NEXT: buffer_store_byte v63, v0, s[0:3], 0 offen offset:219 +; ALIGNED-NEXT: buffer_store_byte v62, v0, s[0:3], 0 offen offset:218 +; ALIGNED-NEXT: buffer_store_byte v61, v0, s[0:3], 0 offen offset:217 +; ALIGNED-NEXT: buffer_store_byte v60, v0, s[0:3], 0 offen offset:216 +; ALIGNED-NEXT: buffer_store_byte v59, v0, s[0:3], 0 offen offset:215 +; ALIGNED-NEXT: buffer_store_byte v58, v0, s[0:3], 0 offen offset:214 +; ALIGNED-NEXT: buffer_store_byte v57, v0, s[0:3], 0 offen offset:213 +; ALIGNED-NEXT: buffer_store_byte v56, v0, s[0:3], 0 offen offset:212 +; ALIGNED-NEXT: buffer_store_byte v47, v0, s[0:3], 0 offen offset:211 +; ALIGNED-NEXT: buffer_store_byte v46, v0, s[0:3], 0 offen offset:210 +; ALIGNED-NEXT: buffer_store_byte v45, v0, s[0:3], 0 offen offset:209 +; ALIGNED-NEXT: buffer_store_byte v44, v0, s[0:3], 0 offen offset:208 +; ALIGNED-NEXT: buffer_store_byte v43, v0, s[0:3], 0 offen offset:207 +; ALIGNED-NEXT: buffer_store_byte v42, v0, s[0:3], 0 offen offset:206 +; ALIGNED-NEXT: buffer_store_byte v41, v0, s[0:3], 0 offen offset:205 +; ALIGNED-NEXT: buffer_store_byte v40, v0, s[0:3], 0 offen offset:204 +; ALIGNED-NEXT: buffer_store_byte v119, v0, s[0:3], 0 offen offset:203 +; ALIGNED-NEXT: buffer_store_byte v118, v0, s[0:3], 0 offen offset:202 +; ALIGNED-NEXT: buffer_store_byte v117, v0, s[0:3], 0 offen offset:201 +; ALIGNED-NEXT: buffer_store_byte v116, v0, s[0:3], 0 offen offset:200 +; ALIGNED-NEXT: buffer_store_byte v115, v0, s[0:3], 0 offen offset:199 +; ALIGNED-NEXT: buffer_store_byte v114, v0, s[0:3], 0 offen offset:198 +; ALIGNED-NEXT: buffer_store_byte v113, v0, s[0:3], 0 offen offset:197 +; ALIGNED-NEXT: buffer_store_byte v112, v0, s[0:3], 0 offen offset:196 +; ALIGNED-NEXT: buffer_store_byte v103, v0, s[0:3], 0 offen offset:195 +; ALIGNED-NEXT: buffer_store_byte v102, v0, s[0:3], 0 offen offset:194 +; ALIGNED-NEXT: buffer_store_byte v101, v0, s[0:3], 0 offen offset:193 +; ALIGNED-NEXT: buffer_store_byte v100, v0, s[0:3], 0 offen offset:192 +; ALIGNED-NEXT: buffer_store_byte v99, v0, s[0:3], 0 offen offset:191 +; ALIGNED-NEXT: buffer_store_byte v98, v0, s[0:3], 0 offen offset:190 +; ALIGNED-NEXT: buffer_store_byte v97, v0, s[0:3], 0 offen offset:189 +; ALIGNED-NEXT: buffer_store_byte v96, v0, s[0:3], 0 offen offset:188 +; ALIGNED-NEXT: buffer_store_byte v87, v0, s[0:3], 0 offen offset:187 +; ALIGNED-NEXT: buffer_store_byte v86, v0, s[0:3], 0 offen offset:186 +; ALIGNED-NEXT: buffer_store_byte v85, v0, s[0:3], 0 offen offset:185 +; ALIGNED-NEXT: buffer_store_byte v84, v0, s[0:3], 0 offen offset:184 +; ALIGNED-NEXT: buffer_store_byte v83, v0, s[0:3], 0 offen offset:183 +; ALIGNED-NEXT: buffer_store_byte v82, v0, s[0:3], 0 offen offset:182 +; ALIGNED-NEXT: buffer_store_byte v81, v0, s[0:3], 0 offen offset:181 +; ALIGNED-NEXT: buffer_store_byte v80, v0, s[0:3], 0 offen offset:180 +; ALIGNED-NEXT: buffer_store_byte v71, v0, s[0:3], 0 offen offset:179 +; ALIGNED-NEXT: buffer_store_byte v70, v0, s[0:3], 0 offen offset:178 +; ALIGNED-NEXT: buffer_store_byte v69, v0, s[0:3], 0 offen offset:177 +; ALIGNED-NEXT: buffer_store_byte v68, v0, s[0:3], 0 offen offset:176 +; ALIGNED-NEXT: buffer_store_byte v67, v0, s[0:3], 0 offen offset:175 +; ALIGNED-NEXT: buffer_store_byte v66, v0, s[0:3], 0 offen offset:174 +; ALIGNED-NEXT: buffer_store_byte v65, v0, s[0:3], 0 offen offset:173 +; ALIGNED-NEXT: buffer_store_byte v64, v0, s[0:3], 0 offen offset:172 +; ALIGNED-NEXT: buffer_store_byte v55, v0, s[0:3], 0 offen offset:171 +; ALIGNED-NEXT: buffer_store_byte v54, v0, s[0:3], 0 offen offset:170 +; ALIGNED-NEXT: buffer_store_byte v53, v0, s[0:3], 0 offen offset:169 +; ALIGNED-NEXT: buffer_store_byte v52, v0, s[0:3], 0 offen offset:168 +; ALIGNED-NEXT: buffer_store_byte v51, v0, s[0:3], 0 offen offset:167 +; ALIGNED-NEXT: buffer_store_byte v50, v0, s[0:3], 0 offen offset:166 +; ALIGNED-NEXT: buffer_store_byte v49, v0, s[0:3], 0 offen offset:165 +; ALIGNED-NEXT: buffer_store_byte v48, v0, s[0:3], 0 offen offset:164 +; ALIGNED-NEXT: buffer_store_byte v39, v0, s[0:3], 0 offen offset:163 +; ALIGNED-NEXT: buffer_store_byte v38, v0, s[0:3], 0 offen offset:162 +; ALIGNED-NEXT: buffer_store_byte v37, v0, s[0:3], 0 offen offset:161 +; ALIGNED-NEXT: buffer_store_byte v36, v0, s[0:3], 0 offen offset:160 +; ALIGNED-NEXT: buffer_store_byte v35, v0, s[0:3], 0 offen offset:159 +; ALIGNED-NEXT: buffer_store_byte v34, v0, s[0:3], 0 offen offset:158 +; ALIGNED-NEXT: buffer_store_byte v33, v0, s[0:3], 0 offen offset:157 +; ALIGNED-NEXT: buffer_store_byte v32, v0, s[0:3], 0 offen offset:156 +; ALIGNED-NEXT: buffer_store_byte v31, v0, s[0:3], 0 offen offset:155 +; ALIGNED-NEXT: buffer_store_byte v30, v0, s[0:3], 0 offen offset:154 +; ALIGNED-NEXT: buffer_store_byte v29, v0, s[0:3], 0 offen offset:153 +; ALIGNED-NEXT: buffer_store_byte v28, v0, s[0:3], 0 offen offset:152 +; ALIGNED-NEXT: buffer_store_byte v27, v0, s[0:3], 0 offen offset:151 +; ALIGNED-NEXT: buffer_store_byte v26, v0, s[0:3], 0 offen offset:150 +; ALIGNED-NEXT: buffer_store_byte v25, v0, s[0:3], 0 offen offset:149 +; ALIGNED-NEXT: buffer_store_byte v24, v0, s[0:3], 0 offen offset:148 +; ALIGNED-NEXT: buffer_store_byte v23, v0, s[0:3], 0 offen offset:147 +; ALIGNED-NEXT: buffer_store_byte v22, v0, s[0:3], 0 offen offset:146 +; ALIGNED-NEXT: buffer_store_byte v21, v0, s[0:3], 0 offen offset:145 +; ALIGNED-NEXT: buffer_store_byte v20, v0, s[0:3], 0 offen offset:144 +; ALIGNED-NEXT: buffer_store_byte v19, v0, s[0:3], 0 offen offset:143 +; ALIGNED-NEXT: buffer_store_byte v18, v0, s[0:3], 0 offen offset:142 +; ALIGNED-NEXT: buffer_store_byte v17, v0, s[0:3], 0 offen offset:141 +; ALIGNED-NEXT: buffer_store_byte v16, v0, s[0:3], 0 offen offset:140 +; ALIGNED-NEXT: buffer_store_byte v15, v0, s[0:3], 0 offen offset:139 +; ALIGNED-NEXT: buffer_store_byte v14, v0, s[0:3], 0 offen offset:138 +; ALIGNED-NEXT: buffer_store_byte v13, v0, s[0:3], 0 offen offset:137 +; ALIGNED-NEXT: buffer_store_byte v12, v0, s[0:3], 0 offen offset:136 +; ALIGNED-NEXT: buffer_store_byte v11, v0, s[0:3], 0 offen offset:135 +; ALIGNED-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:134 +; ALIGNED-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:133 +; ALIGNED-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:132 +; ALIGNED-NEXT: buffer_store_byte v7, v0, s[0:3], 0 offen offset:131 +; ALIGNED-NEXT: buffer_store_byte v6, v0, s[0:3], 0 offen offset:130 +; ALIGNED-NEXT: buffer_store_byte v5, v0, s[0:3], 0 offen offset:129 +; ALIGNED-NEXT: buffer_store_byte v4, v0, s[0:3], 0 offen offset:128 +; ALIGNED-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:127 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:126 +; ALIGNED-NEXT: buffer_store_byte v127, v0, s[0:3], 0 offen offset:125 +; ALIGNED-NEXT: buffer_store_byte v126, v0, s[0:3], 0 offen offset:124 +; ALIGNED-NEXT: buffer_store_byte v125, v0, s[0:3], 0 offen offset:123 +; ALIGNED-NEXT: buffer_store_byte v124, v0, s[0:3], 0 offen offset:122 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:121 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:120 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:119 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:118 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:117 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:116 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:115 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:114 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:113 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:112 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:111 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:110 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:109 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:108 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:107 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:106 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:105 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:104 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:103 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:102 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:101 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:100 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:99 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:98 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:97 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:96 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:95 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:94 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:93 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:92 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:91 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:90 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:89 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:88 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:87 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:86 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:85 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:84 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:83 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:82 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:81 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:80 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:79 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:78 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:77 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:76 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:75 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:74 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:73 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:72 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:71 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:70 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:69 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:68 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:67 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:66 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:65 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:64 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:63 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:62 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:61 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:60 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:59 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:58 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:57 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:56 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:55 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:54 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:53 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:52 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:51 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:50 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:49 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:48 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:47 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:46 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:45 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:44 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:43 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:42 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:41 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:40 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:39 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:38 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:37 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:36 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:35 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:34 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:33 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:32 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:31 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:30 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:29 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:28 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:27 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:26 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:25 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:24 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:23 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:22 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:21 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:20 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:19 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:18 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:17 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:16 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:15 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:14 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:13 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:12 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:11 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:10 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:9 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:8 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:7 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:6 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:5 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:4 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:2 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:1 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen +; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x100, v0 +; ALIGNED-NEXT: s_cbranch_scc1 .LBB8_2 +; ALIGNED-NEXT: .LBB8_3: ; %Flow18 +; ALIGNED-NEXT: s_andn2_saveexec_b32 s6, s6 +; ALIGNED-NEXT: s_cbranch_execz .LBB8_6 +; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader +; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0 +; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1 +; ALIGNED-NEXT: s_movk_i32 s4, 0xf800 +; ALIGNED-NEXT: s_mov_b32 s5, -1 +; ALIGNED-NEXT: .LBB8_5: ; %memmove_bwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:255 +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: s_cmp_eq_u64 s[4:5], 0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:254 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:253 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:252 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:251 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:250 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:249 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:248 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:247 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:246 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:245 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:244 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:243 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:242 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:241 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:240 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:239 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:238 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:237 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:236 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:235 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:234 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:233 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:232 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:231 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:230 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:229 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:228 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:227 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:226 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:225 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:224 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:223 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:222 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:221 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:220 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:219 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:218 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:217 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:216 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:215 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:214 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:213 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:212 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:211 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:210 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:209 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:208 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:207 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:206 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:205 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:204 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:203 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:202 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:201 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:200 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x3e +; ALIGNED-NEXT: buffer_load_ubyte v115, v1, s[0:3], 0 offen offset:199 +; ALIGNED-NEXT: buffer_load_ubyte v114, v1, s[0:3], 0 offen offset:198 +; ALIGNED-NEXT: buffer_load_ubyte v113, v1, s[0:3], 0 offen offset:197 +; ALIGNED-NEXT: buffer_load_ubyte v112, v1, s[0:3], 0 offen offset:196 +; ALIGNED-NEXT: buffer_load_ubyte v103, v1, s[0:3], 0 offen offset:195 +; ALIGNED-NEXT: buffer_load_ubyte v102, v1, s[0:3], 0 offen offset:194 +; ALIGNED-NEXT: buffer_load_ubyte v101, v1, s[0:3], 0 offen offset:193 +; ALIGNED-NEXT: buffer_load_ubyte v100, v1, s[0:3], 0 offen offset:192 +; ALIGNED-NEXT: buffer_load_ubyte v99, v1, s[0:3], 0 offen offset:191 +; ALIGNED-NEXT: buffer_load_ubyte v98, v1, s[0:3], 0 offen offset:190 +; ALIGNED-NEXT: buffer_load_ubyte v97, v1, s[0:3], 0 offen offset:189 +; ALIGNED-NEXT: buffer_load_ubyte v96, v1, s[0:3], 0 offen offset:188 +; ALIGNED-NEXT: buffer_load_ubyte v87, v1, s[0:3], 0 offen offset:187 +; ALIGNED-NEXT: buffer_load_ubyte v86, v1, s[0:3], 0 offen offset:186 +; ALIGNED-NEXT: buffer_load_ubyte v85, v1, s[0:3], 0 offen offset:185 +; ALIGNED-NEXT: buffer_load_ubyte v84, v1, s[0:3], 0 offen offset:184 +; ALIGNED-NEXT: buffer_load_ubyte v83, v1, s[0:3], 0 offen offset:183 +; ALIGNED-NEXT: buffer_load_ubyte v82, v1, s[0:3], 0 offen offset:182 +; ALIGNED-NEXT: buffer_load_ubyte v81, v1, s[0:3], 0 offen offset:181 +; ALIGNED-NEXT: buffer_load_ubyte v80, v1, s[0:3], 0 offen offset:180 +; ALIGNED-NEXT: buffer_load_ubyte v71, v1, s[0:3], 0 offen offset:179 +; ALIGNED-NEXT: buffer_load_ubyte v70, v1, s[0:3], 0 offen offset:178 +; ALIGNED-NEXT: buffer_load_ubyte v69, v1, s[0:3], 0 offen offset:177 +; ALIGNED-NEXT: buffer_load_ubyte v68, v1, s[0:3], 0 offen offset:176 +; ALIGNED-NEXT: buffer_load_ubyte v67, v1, s[0:3], 0 offen offset:175 +; ALIGNED-NEXT: buffer_load_ubyte v66, v1, s[0:3], 0 offen offset:174 +; ALIGNED-NEXT: buffer_load_ubyte v65, v1, s[0:3], 0 offen offset:173 +; ALIGNED-NEXT: buffer_load_ubyte v64, v1, s[0:3], 0 offen offset:172 +; ALIGNED-NEXT: buffer_load_ubyte v55, v1, s[0:3], 0 offen offset:171 +; ALIGNED-NEXT: buffer_load_ubyte v54, v1, s[0:3], 0 offen offset:170 +; ALIGNED-NEXT: buffer_load_ubyte v53, v1, s[0:3], 0 offen offset:169 +; ALIGNED-NEXT: buffer_load_ubyte v52, v1, s[0:3], 0 offen offset:168 +; ALIGNED-NEXT: buffer_load_ubyte v51, v1, s[0:3], 0 offen offset:167 +; ALIGNED-NEXT: buffer_load_ubyte v50, v1, s[0:3], 0 offen offset:166 +; ALIGNED-NEXT: buffer_load_ubyte v49, v1, s[0:3], 0 offen offset:165 +; ALIGNED-NEXT: buffer_load_ubyte v48, v1, s[0:3], 0 offen offset:164 +; ALIGNED-NEXT: buffer_load_ubyte v39, v1, s[0:3], 0 offen offset:163 +; ALIGNED-NEXT: buffer_load_ubyte v38, v1, s[0:3], 0 offen offset:162 +; ALIGNED-NEXT: buffer_load_ubyte v37, v1, s[0:3], 0 offen offset:161 +; ALIGNED-NEXT: buffer_load_ubyte v36, v1, s[0:3], 0 offen offset:160 +; ALIGNED-NEXT: buffer_load_ubyte v35, v1, s[0:3], 0 offen offset:159 +; ALIGNED-NEXT: buffer_load_ubyte v34, v1, s[0:3], 0 offen offset:158 +; ALIGNED-NEXT: buffer_load_ubyte v33, v1, s[0:3], 0 offen offset:157 +; ALIGNED-NEXT: buffer_load_ubyte v32, v1, s[0:3], 0 offen offset:156 +; ALIGNED-NEXT: buffer_load_ubyte v31, v1, s[0:3], 0 offen offset:155 +; ALIGNED-NEXT: buffer_load_ubyte v30, v1, s[0:3], 0 offen offset:154 +; ALIGNED-NEXT: buffer_load_ubyte v29, v1, s[0:3], 0 offen offset:153 +; ALIGNED-NEXT: buffer_load_ubyte v28, v1, s[0:3], 0 offen offset:152 +; ALIGNED-NEXT: buffer_load_ubyte v27, v1, s[0:3], 0 offen offset:151 +; ALIGNED-NEXT: buffer_load_ubyte v26, v1, s[0:3], 0 offen offset:150 +; ALIGNED-NEXT: buffer_load_ubyte v25, v1, s[0:3], 0 offen offset:149 +; ALIGNED-NEXT: buffer_load_ubyte v24, v1, s[0:3], 0 offen offset:148 +; ALIGNED-NEXT: buffer_load_ubyte v23, v1, s[0:3], 0 offen offset:147 +; ALIGNED-NEXT: buffer_load_ubyte v22, v1, s[0:3], 0 offen offset:146 +; ALIGNED-NEXT: buffer_load_ubyte v21, v1, s[0:3], 0 offen offset:145 +; ALIGNED-NEXT: buffer_load_ubyte v20, v1, s[0:3], 0 offen offset:144 +; ALIGNED-NEXT: buffer_load_ubyte v19, v1, s[0:3], 0 offen offset:143 +; ALIGNED-NEXT: buffer_load_ubyte v18, v1, s[0:3], 0 offen offset:142 +; ALIGNED-NEXT: buffer_load_ubyte v17, v1, s[0:3], 0 offen offset:141 +; ALIGNED-NEXT: buffer_load_ubyte v16, v1, s[0:3], 0 offen offset:140 +; ALIGNED-NEXT: buffer_load_ubyte v15, v1, s[0:3], 0 offen offset:139 +; ALIGNED-NEXT: buffer_load_ubyte v14, v1, s[0:3], 0 offen offset:138 +; ALIGNED-NEXT: buffer_load_ubyte v13, v1, s[0:3], 0 offen offset:137 +; ALIGNED-NEXT: s_clause 0xa +; ALIGNED-NEXT: buffer_load_ubyte v12, v1, s[0:3], 0 offen offset:136 +; ALIGNED-NEXT: buffer_load_ubyte v11, v1, s[0:3], 0 offen offset:135 +; ALIGNED-NEXT: buffer_load_ubyte v10, v1, s[0:3], 0 offen offset:134 +; ALIGNED-NEXT: buffer_load_ubyte v9, v1, s[0:3], 0 offen offset:133 +; ALIGNED-NEXT: buffer_load_ubyte v8, v1, s[0:3], 0 offen offset:132 +; ALIGNED-NEXT: buffer_load_ubyte v7, v1, s[0:3], 0 offen offset:131 +; ALIGNED-NEXT: buffer_load_ubyte v6, v1, s[0:3], 0 offen offset:130 +; ALIGNED-NEXT: buffer_load_ubyte v5, v1, s[0:3], 0 offen offset:129 +; ALIGNED-NEXT: buffer_load_ubyte v4, v1, s[0:3], 0 offen offset:128 +; ALIGNED-NEXT: buffer_load_ubyte v3, v1, s[0:3], 0 offen offset:127 +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:126 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x34 +; ALIGNED-NEXT: buffer_load_ubyte v127, v1, s[0:3], 0 offen offset:125 +; ALIGNED-NEXT: buffer_load_ubyte v126, v1, s[0:3], 0 offen offset:124 +; ALIGNED-NEXT: buffer_load_ubyte v125, v1, s[0:3], 0 offen offset:123 +; ALIGNED-NEXT: buffer_load_ubyte v124, v1, s[0:3], 0 offen offset:122 +; ALIGNED-NEXT: buffer_load_ubyte v123, v1, s[0:3], 0 offen offset:121 +; ALIGNED-NEXT: buffer_load_ubyte v122, v1, s[0:3], 0 offen offset:120 +; ALIGNED-NEXT: buffer_load_ubyte v121, v1, s[0:3], 0 offen offset:119 +; ALIGNED-NEXT: buffer_load_ubyte v120, v1, s[0:3], 0 offen offset:118 +; ALIGNED-NEXT: buffer_load_ubyte v111, v1, s[0:3], 0 offen offset:117 +; ALIGNED-NEXT: buffer_load_ubyte v110, v1, s[0:3], 0 offen offset:116 +; ALIGNED-NEXT: buffer_load_ubyte v109, v1, s[0:3], 0 offen offset:115 +; ALIGNED-NEXT: buffer_load_ubyte v108, v1, s[0:3], 0 offen offset:114 +; ALIGNED-NEXT: buffer_load_ubyte v107, v1, s[0:3], 0 offen offset:113 +; ALIGNED-NEXT: buffer_load_ubyte v106, v1, s[0:3], 0 offen offset:112 +; ALIGNED-NEXT: buffer_load_ubyte v105, v1, s[0:3], 0 offen offset:111 +; ALIGNED-NEXT: buffer_load_ubyte v104, v1, s[0:3], 0 offen offset:110 +; ALIGNED-NEXT: buffer_load_ubyte v95, v1, s[0:3], 0 offen offset:109 +; ALIGNED-NEXT: buffer_load_ubyte v94, v1, s[0:3], 0 offen offset:108 +; ALIGNED-NEXT: buffer_load_ubyte v93, v1, s[0:3], 0 offen offset:107 +; ALIGNED-NEXT: buffer_load_ubyte v92, v1, s[0:3], 0 offen offset:106 +; ALIGNED-NEXT: buffer_load_ubyte v91, v1, s[0:3], 0 offen offset:105 +; ALIGNED-NEXT: buffer_load_ubyte v90, v1, s[0:3], 0 offen offset:104 +; ALIGNED-NEXT: buffer_load_ubyte v89, v1, s[0:3], 0 offen offset:103 +; ALIGNED-NEXT: buffer_load_ubyte v88, v1, s[0:3], 0 offen offset:102 +; ALIGNED-NEXT: buffer_load_ubyte v79, v1, s[0:3], 0 offen offset:101 +; ALIGNED-NEXT: buffer_load_ubyte v78, v1, s[0:3], 0 offen offset:100 +; ALIGNED-NEXT: buffer_load_ubyte v77, v1, s[0:3], 0 offen offset:99 +; ALIGNED-NEXT: buffer_load_ubyte v76, v1, s[0:3], 0 offen offset:98 +; ALIGNED-NEXT: buffer_load_ubyte v75, v1, s[0:3], 0 offen offset:97 +; ALIGNED-NEXT: buffer_load_ubyte v74, v1, s[0:3], 0 offen offset:96 +; ALIGNED-NEXT: buffer_load_ubyte v73, v1, s[0:3], 0 offen offset:95 +; ALIGNED-NEXT: buffer_load_ubyte v72, v1, s[0:3], 0 offen offset:94 +; ALIGNED-NEXT: buffer_load_ubyte v63, v1, s[0:3], 0 offen offset:93 +; ALIGNED-NEXT: buffer_load_ubyte v62, v1, s[0:3], 0 offen offset:92 +; ALIGNED-NEXT: buffer_load_ubyte v61, v1, s[0:3], 0 offen offset:91 +; ALIGNED-NEXT: buffer_load_ubyte v60, v1, s[0:3], 0 offen offset:90 +; ALIGNED-NEXT: buffer_load_ubyte v59, v1, s[0:3], 0 offen offset:89 +; ALIGNED-NEXT: buffer_load_ubyte v58, v1, s[0:3], 0 offen offset:88 +; ALIGNED-NEXT: buffer_load_ubyte v57, v1, s[0:3], 0 offen offset:87 +; ALIGNED-NEXT: buffer_load_ubyte v56, v1, s[0:3], 0 offen offset:86 +; ALIGNED-NEXT: buffer_load_ubyte v47, v1, s[0:3], 0 offen offset:85 +; ALIGNED-NEXT: buffer_load_ubyte v46, v1, s[0:3], 0 offen offset:84 +; ALIGNED-NEXT: buffer_load_ubyte v45, v1, s[0:3], 0 offen offset:83 +; ALIGNED-NEXT: buffer_load_ubyte v44, v1, s[0:3], 0 offen offset:82 +; ALIGNED-NEXT: buffer_load_ubyte v43, v1, s[0:3], 0 offen offset:81 +; ALIGNED-NEXT: buffer_load_ubyte v42, v1, s[0:3], 0 offen offset:80 +; ALIGNED-NEXT: buffer_load_ubyte v41, v1, s[0:3], 0 offen offset:79 +; ALIGNED-NEXT: buffer_load_ubyte v40, v1, s[0:3], 0 offen offset:78 +; ALIGNED-NEXT: buffer_load_ubyte v119, v1, s[0:3], 0 offen offset:77 +; ALIGNED-NEXT: buffer_load_ubyte v118, v1, s[0:3], 0 offen offset:76 +; ALIGNED-NEXT: buffer_load_ubyte v117, v1, s[0:3], 0 offen offset:75 +; ALIGNED-NEXT: buffer_load_ubyte v116, v1, s[0:3], 0 offen offset:74 +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:73 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:72 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:71 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:70 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:69 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:68 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:67 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:66 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:65 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:64 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:63 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:62 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:61 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:60 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:59 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:58 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:57 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:56 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:55 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:54 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:53 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:52 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:51 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:50 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:49 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:48 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:47 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:46 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:45 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:44 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:43 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:42 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:41 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:40 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:39 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:38 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:37 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:36 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:35 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:34 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:33 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:32 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:31 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:30 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:29 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:28 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:27 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:26 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:25 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:24 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:23 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:22 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:21 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:20 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:19 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:18 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:17 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:16 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:15 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:14 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:13 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:12 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:11 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:10 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:9 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:8 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:7 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:6 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:5 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:3 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:2 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:1 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v2, v1, s[0:3], 0 offen +; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0xffffff00, v1 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:255 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:254 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:253 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:252 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:251 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:250 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:249 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:248 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:247 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:246 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:245 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:244 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:243 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:242 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:241 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:240 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:239 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:238 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:237 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:236 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:235 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:234 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:233 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:232 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:231 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:230 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:229 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:228 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:227 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:226 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:225 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:224 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:223 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:222 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:221 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:220 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:219 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:218 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:217 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:216 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:215 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:214 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:213 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:212 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:211 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:210 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:209 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:208 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:207 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:206 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:205 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:204 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:203 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:202 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:201 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:200 +; ALIGNED-NEXT: buffer_store_byte v115, v0, s[0:3], 0 offen offset:199 +; ALIGNED-NEXT: buffer_store_byte v114, v0, s[0:3], 0 offen offset:198 +; ALIGNED-NEXT: buffer_store_byte v113, v0, s[0:3], 0 offen offset:197 +; ALIGNED-NEXT: buffer_store_byte v112, v0, s[0:3], 0 offen offset:196 +; ALIGNED-NEXT: buffer_store_byte v103, v0, s[0:3], 0 offen offset:195 +; ALIGNED-NEXT: buffer_store_byte v102, v0, s[0:3], 0 offen offset:194 +; ALIGNED-NEXT: buffer_store_byte v101, v0, s[0:3], 0 offen offset:193 +; ALIGNED-NEXT: buffer_store_byte v100, v0, s[0:3], 0 offen offset:192 +; ALIGNED-NEXT: buffer_store_byte v99, v0, s[0:3], 0 offen offset:191 +; ALIGNED-NEXT: buffer_store_byte v98, v0, s[0:3], 0 offen offset:190 +; ALIGNED-NEXT: buffer_store_byte v97, v0, s[0:3], 0 offen offset:189 +; ALIGNED-NEXT: buffer_store_byte v96, v0, s[0:3], 0 offen offset:188 +; ALIGNED-NEXT: buffer_store_byte v87, v0, s[0:3], 0 offen offset:187 +; ALIGNED-NEXT: buffer_store_byte v86, v0, s[0:3], 0 offen offset:186 +; ALIGNED-NEXT: buffer_store_byte v85, v0, s[0:3], 0 offen offset:185 +; ALIGNED-NEXT: buffer_store_byte v84, v0, s[0:3], 0 offen offset:184 +; ALIGNED-NEXT: buffer_store_byte v83, v0, s[0:3], 0 offen offset:183 +; ALIGNED-NEXT: buffer_store_byte v82, v0, s[0:3], 0 offen offset:182 +; ALIGNED-NEXT: buffer_store_byte v81, v0, s[0:3], 0 offen offset:181 +; ALIGNED-NEXT: buffer_store_byte v80, v0, s[0:3], 0 offen offset:180 +; ALIGNED-NEXT: buffer_store_byte v71, v0, s[0:3], 0 offen offset:179 +; ALIGNED-NEXT: buffer_store_byte v70, v0, s[0:3], 0 offen offset:178 +; ALIGNED-NEXT: buffer_store_byte v69, v0, s[0:3], 0 offen offset:177 +; ALIGNED-NEXT: buffer_store_byte v68, v0, s[0:3], 0 offen offset:176 +; ALIGNED-NEXT: buffer_store_byte v67, v0, s[0:3], 0 offen offset:175 +; ALIGNED-NEXT: buffer_store_byte v66, v0, s[0:3], 0 offen offset:174 +; ALIGNED-NEXT: buffer_store_byte v65, v0, s[0:3], 0 offen offset:173 +; ALIGNED-NEXT: buffer_store_byte v64, v0, s[0:3], 0 offen offset:172 +; ALIGNED-NEXT: buffer_store_byte v55, v0, s[0:3], 0 offen offset:171 +; ALIGNED-NEXT: buffer_store_byte v54, v0, s[0:3], 0 offen offset:170 +; ALIGNED-NEXT: buffer_store_byte v53, v0, s[0:3], 0 offen offset:169 +; ALIGNED-NEXT: buffer_store_byte v52, v0, s[0:3], 0 offen offset:168 +; ALIGNED-NEXT: buffer_store_byte v51, v0, s[0:3], 0 offen offset:167 +; ALIGNED-NEXT: buffer_store_byte v50, v0, s[0:3], 0 offen offset:166 +; ALIGNED-NEXT: buffer_store_byte v49, v0, s[0:3], 0 offen offset:165 +; ALIGNED-NEXT: buffer_store_byte v48, v0, s[0:3], 0 offen offset:164 +; ALIGNED-NEXT: buffer_store_byte v39, v0, s[0:3], 0 offen offset:163 +; ALIGNED-NEXT: buffer_store_byte v38, v0, s[0:3], 0 offen offset:162 +; ALIGNED-NEXT: buffer_store_byte v37, v0, s[0:3], 0 offen offset:161 +; ALIGNED-NEXT: buffer_store_byte v36, v0, s[0:3], 0 offen offset:160 +; ALIGNED-NEXT: buffer_store_byte v35, v0, s[0:3], 0 offen offset:159 +; ALIGNED-NEXT: buffer_store_byte v34, v0, s[0:3], 0 offen offset:158 +; ALIGNED-NEXT: buffer_store_byte v33, v0, s[0:3], 0 offen offset:157 +; ALIGNED-NEXT: buffer_store_byte v32, v0, s[0:3], 0 offen offset:156 +; ALIGNED-NEXT: buffer_store_byte v31, v0, s[0:3], 0 offen offset:155 +; ALIGNED-NEXT: buffer_store_byte v30, v0, s[0:3], 0 offen offset:154 +; ALIGNED-NEXT: buffer_store_byte v29, v0, s[0:3], 0 offen offset:153 +; ALIGNED-NEXT: buffer_store_byte v28, v0, s[0:3], 0 offen offset:152 +; ALIGNED-NEXT: buffer_store_byte v27, v0, s[0:3], 0 offen offset:151 +; ALIGNED-NEXT: buffer_store_byte v26, v0, s[0:3], 0 offen offset:150 +; ALIGNED-NEXT: buffer_store_byte v25, v0, s[0:3], 0 offen offset:149 +; ALIGNED-NEXT: buffer_store_byte v24, v0, s[0:3], 0 offen offset:148 +; ALIGNED-NEXT: buffer_store_byte v23, v0, s[0:3], 0 offen offset:147 +; ALIGNED-NEXT: buffer_store_byte v22, v0, s[0:3], 0 offen offset:146 +; ALIGNED-NEXT: buffer_store_byte v21, v0, s[0:3], 0 offen offset:145 +; ALIGNED-NEXT: buffer_store_byte v20, v0, s[0:3], 0 offen offset:144 +; ALIGNED-NEXT: buffer_store_byte v19, v0, s[0:3], 0 offen offset:143 +; ALIGNED-NEXT: buffer_store_byte v18, v0, s[0:3], 0 offen offset:142 +; ALIGNED-NEXT: buffer_store_byte v17, v0, s[0:3], 0 offen offset:141 +; ALIGNED-NEXT: buffer_store_byte v16, v0, s[0:3], 0 offen offset:140 +; ALIGNED-NEXT: buffer_store_byte v15, v0, s[0:3], 0 offen offset:139 +; ALIGNED-NEXT: buffer_store_byte v14, v0, s[0:3], 0 offen offset:138 +; ALIGNED-NEXT: buffer_store_byte v13, v0, s[0:3], 0 offen offset:137 +; ALIGNED-NEXT: buffer_store_byte v12, v0, s[0:3], 0 offen offset:136 +; ALIGNED-NEXT: buffer_store_byte v11, v0, s[0:3], 0 offen offset:135 +; ALIGNED-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:134 +; ALIGNED-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:133 +; ALIGNED-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:132 +; ALIGNED-NEXT: buffer_store_byte v7, v0, s[0:3], 0 offen offset:131 +; ALIGNED-NEXT: buffer_store_byte v6, v0, s[0:3], 0 offen offset:130 +; ALIGNED-NEXT: buffer_store_byte v5, v0, s[0:3], 0 offen offset:129 +; ALIGNED-NEXT: buffer_store_byte v4, v0, s[0:3], 0 offen offset:128 +; ALIGNED-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:127 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:126 +; ALIGNED-NEXT: buffer_store_byte v127, v0, s[0:3], 0 offen offset:125 +; ALIGNED-NEXT: buffer_store_byte v126, v0, s[0:3], 0 offen offset:124 +; ALIGNED-NEXT: buffer_store_byte v125, v0, s[0:3], 0 offen offset:123 +; ALIGNED-NEXT: buffer_store_byte v124, v0, s[0:3], 0 offen offset:122 +; ALIGNED-NEXT: buffer_store_byte v123, v0, s[0:3], 0 offen offset:121 +; ALIGNED-NEXT: buffer_store_byte v122, v0, s[0:3], 0 offen offset:120 +; ALIGNED-NEXT: buffer_store_byte v121, v0, s[0:3], 0 offen offset:119 +; ALIGNED-NEXT: buffer_store_byte v120, v0, s[0:3], 0 offen offset:118 +; ALIGNED-NEXT: buffer_store_byte v111, v0, s[0:3], 0 offen offset:117 +; ALIGNED-NEXT: buffer_store_byte v110, v0, s[0:3], 0 offen offset:116 +; ALIGNED-NEXT: buffer_store_byte v109, v0, s[0:3], 0 offen offset:115 +; ALIGNED-NEXT: buffer_store_byte v108, v0, s[0:3], 0 offen offset:114 +; ALIGNED-NEXT: buffer_store_byte v107, v0, s[0:3], 0 offen offset:113 +; ALIGNED-NEXT: buffer_store_byte v106, v0, s[0:3], 0 offen offset:112 +; ALIGNED-NEXT: buffer_store_byte v105, v0, s[0:3], 0 offen offset:111 +; ALIGNED-NEXT: buffer_store_byte v104, v0, s[0:3], 0 offen offset:110 +; ALIGNED-NEXT: buffer_store_byte v95, v0, s[0:3], 0 offen offset:109 +; ALIGNED-NEXT: buffer_store_byte v94, v0, s[0:3], 0 offen offset:108 +; ALIGNED-NEXT: buffer_store_byte v93, v0, s[0:3], 0 offen offset:107 +; ALIGNED-NEXT: buffer_store_byte v92, v0, s[0:3], 0 offen offset:106 +; ALIGNED-NEXT: buffer_store_byte v91, v0, s[0:3], 0 offen offset:105 +; ALIGNED-NEXT: buffer_store_byte v90, v0, s[0:3], 0 offen offset:104 +; ALIGNED-NEXT: buffer_store_byte v89, v0, s[0:3], 0 offen offset:103 +; ALIGNED-NEXT: buffer_store_byte v88, v0, s[0:3], 0 offen offset:102 +; ALIGNED-NEXT: buffer_store_byte v79, v0, s[0:3], 0 offen offset:101 +; ALIGNED-NEXT: buffer_store_byte v78, v0, s[0:3], 0 offen offset:100 +; ALIGNED-NEXT: buffer_store_byte v77, v0, s[0:3], 0 offen offset:99 +; ALIGNED-NEXT: buffer_store_byte v76, v0, s[0:3], 0 offen offset:98 +; ALIGNED-NEXT: buffer_store_byte v75, v0, s[0:3], 0 offen offset:97 +; ALIGNED-NEXT: buffer_store_byte v74, v0, s[0:3], 0 offen offset:96 +; ALIGNED-NEXT: buffer_store_byte v73, v0, s[0:3], 0 offen offset:95 +; ALIGNED-NEXT: buffer_store_byte v72, v0, s[0:3], 0 offen offset:94 +; ALIGNED-NEXT: buffer_store_byte v63, v0, s[0:3], 0 offen offset:93 +; ALIGNED-NEXT: buffer_store_byte v62, v0, s[0:3], 0 offen offset:92 +; ALIGNED-NEXT: buffer_store_byte v61, v0, s[0:3], 0 offen offset:91 +; ALIGNED-NEXT: buffer_store_byte v60, v0, s[0:3], 0 offen offset:90 +; ALIGNED-NEXT: buffer_store_byte v59, v0, s[0:3], 0 offen offset:89 +; ALIGNED-NEXT: buffer_store_byte v58, v0, s[0:3], 0 offen offset:88 +; ALIGNED-NEXT: buffer_store_byte v57, v0, s[0:3], 0 offen offset:87 +; ALIGNED-NEXT: buffer_store_byte v56, v0, s[0:3], 0 offen offset:86 +; ALIGNED-NEXT: buffer_store_byte v47, v0, s[0:3], 0 offen offset:85 +; ALIGNED-NEXT: buffer_store_byte v46, v0, s[0:3], 0 offen offset:84 +; ALIGNED-NEXT: buffer_store_byte v45, v0, s[0:3], 0 offen offset:83 +; ALIGNED-NEXT: buffer_store_byte v44, v0, s[0:3], 0 offen offset:82 +; ALIGNED-NEXT: buffer_store_byte v43, v0, s[0:3], 0 offen offset:81 +; ALIGNED-NEXT: buffer_store_byte v42, v0, s[0:3], 0 offen offset:80 +; ALIGNED-NEXT: buffer_store_byte v41, v0, s[0:3], 0 offen offset:79 +; ALIGNED-NEXT: buffer_store_byte v40, v0, s[0:3], 0 offen offset:78 +; ALIGNED-NEXT: buffer_store_byte v119, v0, s[0:3], 0 offen offset:77 +; ALIGNED-NEXT: buffer_store_byte v118, v0, s[0:3], 0 offen offset:76 +; ALIGNED-NEXT: buffer_store_byte v117, v0, s[0:3], 0 offen offset:75 +; ALIGNED-NEXT: buffer_store_byte v116, v0, s[0:3], 0 offen offset:74 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:73 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:72 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:71 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:70 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:69 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:68 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:67 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:66 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:65 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:64 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:63 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:62 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:61 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:60 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:59 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:58 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:57 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:56 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:55 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:54 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:53 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:52 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:51 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:50 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:49 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:48 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:47 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:46 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:45 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:44 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:43 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:42 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:41 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:40 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:39 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:38 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:37 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:36 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:35 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:34 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:33 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:32 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:31 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:30 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:29 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:28 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:27 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:26 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:25 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:24 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:23 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:22 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:21 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:20 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:19 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:18 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:17 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:16 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:15 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:14 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:13 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:12 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:11 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:10 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:9 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:8 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:7 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:6 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:5 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:4 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:2 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:1 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen +; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0xffffff00, v0 +; ALIGNED-NEXT: s_cbranch_scc0 .LBB8_5 +; ALIGNED-NEXT: .LBB8_6: ; %Flow19 +; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s6 +; ALIGNED-NEXT: s_clause 0x2f +; ALIGNED-NEXT: buffer_load_dword v127, off, s[0:3], s32 +; ALIGNED-NEXT: buffer_load_dword v126, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_load_dword v125, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_load_dword v124, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_load_dword v123, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: buffer_load_dword v122, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_load_dword v121, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_load_dword v120, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: buffer_load_dword v111, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: buffer_load_dword v110, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_load_dword v109, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_load_dword v108, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_load_dword v107, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: buffer_load_dword v106, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_load_dword v105, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_load_dword v104, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_load_dword v95, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: buffer_load_dword v94, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_load_dword v93, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_load_dword v92, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_load_dword v91, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: buffer_load_dword v90, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_load_dword v89, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_load_dword v88, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_load_dword v79, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: buffer_load_dword v78, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_load_dword v77, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_load_dword v76, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_load_dword v75, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: buffer_load_dword v74, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_load_dword v73, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_load_dword v72, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memmove_p5_p5_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: s_mov_b32 s4, exec_lo +; UNROLL3-NEXT: v_cmpx_ge_u32_e64 v1, v0 +; UNROLL3-NEXT: s_xor_b32 s6, exec_lo, s4 +; UNROLL3-NEXT: s_cbranch_execz .LBB8_4 +; UNROLL3-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; UNROLL3-NEXT: v_mov_b32_e32 v2, v1 +; UNROLL3-NEXT: v_mov_b32_e32 v3, v0 +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0x7e0 +; UNROLL3-NEXT: .LBB8_2: ; %memmove_fwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: s_clause 0xb +; UNROLL3-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:44 +; UNROLL3-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:40 +; UNROLL3-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:36 +; UNROLL3-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:32 +; UNROLL3-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:28 +; UNROLL3-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 +; UNROLL3-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:20 +; UNROLL3-NEXT: buffer_load_dword v11, v2, s[0:3], 0 offen offset:16 +; UNROLL3-NEXT: buffer_load_dword v12, v2, s[0:3], 0 offen offset:12 +; UNROLL3-NEXT: buffer_load_dword v13, v2, s[0:3], 0 offen offset:8 +; UNROLL3-NEXT: buffer_load_dword v14, v2, s[0:3], 0 offen offset:4 +; UNROLL3-NEXT: buffer_load_dword v15, v2, s[0:3], 0 offen +; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 48, v2 +; UNROLL3-NEXT: s_add_u32 s4, s4, 0xffffffd0 +; UNROLL3-NEXT: s_addc_u32 s5, s5, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(11) +; UNROLL3-NEXT: buffer_store_dword v4, v3, s[0:3], 0 offen offset:44 +; UNROLL3-NEXT: s_waitcnt vmcnt(10) +; UNROLL3-NEXT: buffer_store_dword v5, v3, s[0:3], 0 offen offset:40 +; UNROLL3-NEXT: s_waitcnt vmcnt(9) +; UNROLL3-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen offset:36 +; UNROLL3-NEXT: s_waitcnt vmcnt(8) +; UNROLL3-NEXT: buffer_store_dword v7, v3, s[0:3], 0 offen offset:32 +; UNROLL3-NEXT: s_waitcnt vmcnt(7) +; UNROLL3-NEXT: buffer_store_dword v8, v3, s[0:3], 0 offen offset:28 +; UNROLL3-NEXT: s_waitcnt vmcnt(6) +; UNROLL3-NEXT: buffer_store_dword v9, v3, s[0:3], 0 offen offset:24 +; UNROLL3-NEXT: s_waitcnt vmcnt(5) +; UNROLL3-NEXT: buffer_store_dword v10, v3, s[0:3], 0 offen offset:20 +; UNROLL3-NEXT: s_waitcnt vmcnt(4) +; UNROLL3-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(3) +; UNROLL3-NEXT: buffer_store_dword v12, v3, s[0:3], 0 offen offset:12 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: buffer_store_dword v13, v3, s[0:3], 0 offen offset:8 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: buffer_store_dword v14, v3, s[0:3], 0 offen offset:4 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: buffer_store_dword v15, v3, s[0:3], 0 offen +; UNROLL3-NEXT: v_add_nc_u32_e32 v3, 48, v3 +; UNROLL3-NEXT: s_cmp_lg_u64 s[4:5], 0 +; UNROLL3-NEXT: s_cbranch_scc1 .LBB8_2 +; UNROLL3-NEXT: ; %bb.3: ; %memmove_fwd_residual +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:2028 +; UNROLL3-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:2024 +; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2020 +; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2016 +; UNROLL3-NEXT: s_waitcnt vmcnt(3) +; UNROLL3-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:2028 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2024 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:2020 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:2016 +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:2044 +; UNROLL3-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:2040 +; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2036 +; UNROLL3-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:2032 +; UNROLL3-NEXT: s_waitcnt vmcnt(3) +; UNROLL3-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:2044 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2040 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:2036 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032 +; UNROLL3-NEXT: ; implicit-def: $vgpr1 +; UNROLL3-NEXT: ; implicit-def: $vgpr0 +; UNROLL3-NEXT: .LBB8_4: ; %Flow16 +; UNROLL3-NEXT: s_andn2_saveexec_b32 s6, s6 +; UNROLL3-NEXT: s_cbranch_execz .LBB8_7 +; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:2044 +; UNROLL3-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:2040 +; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2036 +; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2032 +; UNROLL3-NEXT: s_movk_i32 s4, 0xf820 +; UNROLL3-NEXT: s_mov_b32 s5, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(3) +; UNROLL3-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:2044 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2040 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:2036 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:2032 +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:2028 +; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024 +; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020 +; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016 +; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0 +; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1 +; UNROLL3-NEXT: s_waitcnt vmcnt(3) +; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:2024 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:2020 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:2016 +; UNROLL3-NEXT: .LBB8_6: ; %memmove_bwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: s_clause 0xb +; UNROLL3-NEXT: buffer_load_dword v0, v1, s[0:3], 0 offen offset:44 +; UNROLL3-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:40 +; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:36 +; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:32 +; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:28 +; UNROLL3-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:24 +; UNROLL3-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:20 +; UNROLL3-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:16 +; UNROLL3-NEXT: buffer_load_dword v10, v1, s[0:3], 0 offen offset:12 +; UNROLL3-NEXT: buffer_load_dword v11, v1, s[0:3], 0 offen offset:8 +; UNROLL3-NEXT: buffer_load_dword v12, v1, s[0:3], 0 offen offset:4 +; UNROLL3-NEXT: buffer_load_dword v13, v1, s[0:3], 0 offen +; UNROLL3-NEXT: v_subrev_nc_u32_e32 v1, 48, v1 +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: s_waitcnt vmcnt(11) +; UNROLL3-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen offset:44 +; UNROLL3-NEXT: s_waitcnt vmcnt(10) +; UNROLL3-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:40 +; UNROLL3-NEXT: s_waitcnt vmcnt(9) +; UNROLL3-NEXT: buffer_store_dword v4, v2, s[0:3], 0 offen offset:36 +; UNROLL3-NEXT: s_waitcnt vmcnt(8) +; UNROLL3-NEXT: buffer_store_dword v5, v2, s[0:3], 0 offen offset:32 +; UNROLL3-NEXT: s_waitcnt vmcnt(7) +; UNROLL3-NEXT: buffer_store_dword v6, v2, s[0:3], 0 offen offset:28 +; UNROLL3-NEXT: s_waitcnt vmcnt(6) +; UNROLL3-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen offset:24 +; UNROLL3-NEXT: s_waitcnt vmcnt(5) +; UNROLL3-NEXT: buffer_store_dword v8, v2, s[0:3], 0 offen offset:20 +; UNROLL3-NEXT: s_waitcnt vmcnt(4) +; UNROLL3-NEXT: buffer_store_dword v9, v2, s[0:3], 0 offen offset:16 +; UNROLL3-NEXT: s_waitcnt vmcnt(3) +; UNROLL3-NEXT: buffer_store_dword v10, v2, s[0:3], 0 offen offset:12 +; UNROLL3-NEXT: s_waitcnt vmcnt(2) +; UNROLL3-NEXT: buffer_store_dword v11, v2, s[0:3], 0 offen offset:8 +; UNROLL3-NEXT: s_waitcnt vmcnt(1) +; UNROLL3-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen offset:4 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen +; UNROLL3-NEXT: v_subrev_nc_u32_e32 v2, 48, v2 +; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], 0 +; UNROLL3-NEXT: s_cbranch_scc0 .LBB8_6 +; UNROLL3-NEXT: .LBB8_7: ; %Flow17 +; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s6 +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + +define void @memmove_p0_p5_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { +; CHECK-LABEL: memmove_p0_p5_sz2048: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1] +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: s_mov_b32 s6, exec_lo +; CHECK-NEXT: v_cndmask_b32_e32 v3, -1, v0, vcc_lo +; CHECK-NEXT: v_cmpx_ge_u32_e64 v2, v3 +; CHECK-NEXT: s_xor_b32 s6, exec_lo, s6 +; CHECK-NEXT: s_cbranch_execz .LBB9_2 +; CHECK-NEXT: .LBB9_1: ; %memmove_fwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_clause 0x3e +; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:32 +; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:36 +; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:40 +; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:44 +; CHECK-NEXT: buffer_load_dword v11, v2, s[0:3], 0 offen offset:48 +; CHECK-NEXT: buffer_load_dword v12, v2, s[0:3], 0 offen offset:52 +; CHECK-NEXT: buffer_load_dword v13, v2, s[0:3], 0 offen offset:56 +; CHECK-NEXT: buffer_load_dword v14, v2, s[0:3], 0 offen offset:60 +; CHECK-NEXT: buffer_load_dword v18, v2, s[0:3], 0 offen offset:92 +; CHECK-NEXT: buffer_load_dword v17, v2, s[0:3], 0 offen offset:88 +; CHECK-NEXT: buffer_load_dword v16, v2, s[0:3], 0 offen offset:84 +; CHECK-NEXT: buffer_load_dword v15, v2, s[0:3], 0 offen offset:80 +; CHECK-NEXT: buffer_load_dword v22, v2, s[0:3], 0 offen offset:124 +; CHECK-NEXT: buffer_load_dword v21, v2, s[0:3], 0 offen offset:120 +; CHECK-NEXT: buffer_load_dword v20, v2, s[0:3], 0 offen offset:116 +; CHECK-NEXT: buffer_load_dword v19, v2, s[0:3], 0 offen offset:112 +; CHECK-NEXT: buffer_load_dword v26, v2, s[0:3], 0 offen offset:108 +; CHECK-NEXT: buffer_load_dword v25, v2, s[0:3], 0 offen offset:104 +; CHECK-NEXT: buffer_load_dword v24, v2, s[0:3], 0 offen offset:100 +; CHECK-NEXT: buffer_load_dword v23, v2, s[0:3], 0 offen offset:96 +; CHECK-NEXT: buffer_load_dword v30, v2, s[0:3], 0 offen offset:156 +; CHECK-NEXT: buffer_load_dword v29, v2, s[0:3], 0 offen offset:152 +; CHECK-NEXT: buffer_load_dword v28, v2, s[0:3], 0 offen offset:148 +; CHECK-NEXT: buffer_load_dword v27, v2, s[0:3], 0 offen offset:144 +; CHECK-NEXT: buffer_load_dword v34, v2, s[0:3], 0 offen offset:188 +; CHECK-NEXT: buffer_load_dword v33, v2, s[0:3], 0 offen offset:184 +; CHECK-NEXT: buffer_load_dword v32, v2, s[0:3], 0 offen offset:180 +; CHECK-NEXT: buffer_load_dword v31, v2, s[0:3], 0 offen offset:176 +; CHECK-NEXT: buffer_load_dword v38, v2, s[0:3], 0 offen offset:172 +; CHECK-NEXT: buffer_load_dword v37, v2, s[0:3], 0 offen offset:168 +; CHECK-NEXT: buffer_load_dword v36, v2, s[0:3], 0 offen offset:164 +; CHECK-NEXT: buffer_load_dword v35, v2, s[0:3], 0 offen offset:160 +; CHECK-NEXT: buffer_load_dword v51, v2, s[0:3], 0 offen offset:220 +; CHECK-NEXT: buffer_load_dword v50, v2, s[0:3], 0 offen offset:216 +; CHECK-NEXT: buffer_load_dword v49, v2, s[0:3], 0 offen offset:212 +; CHECK-NEXT: buffer_load_dword v48, v2, s[0:3], 0 offen offset:208 +; CHECK-NEXT: buffer_load_dword v55, v2, s[0:3], 0 offen offset:252 +; CHECK-NEXT: buffer_load_dword v54, v2, s[0:3], 0 offen offset:248 +; CHECK-NEXT: buffer_load_dword v53, v2, s[0:3], 0 offen offset:244 +; CHECK-NEXT: buffer_load_dword v52, v2, s[0:3], 0 offen offset:240 +; CHECK-NEXT: buffer_load_dword v67, v2, s[0:3], 0 offen offset:236 +; CHECK-NEXT: buffer_load_dword v66, v2, s[0:3], 0 offen offset:232 +; CHECK-NEXT: buffer_load_dword v65, v2, s[0:3], 0 offen offset:228 +; CHECK-NEXT: buffer_load_dword v64, v2, s[0:3], 0 offen offset:224 +; CHECK-NEXT: buffer_load_dword v71, v2, s[0:3], 0 offen offset:204 +; CHECK-NEXT: buffer_load_dword v70, v2, s[0:3], 0 offen offset:200 +; CHECK-NEXT: buffer_load_dword v69, v2, s[0:3], 0 offen offset:196 +; CHECK-NEXT: buffer_load_dword v68, v2, s[0:3], 0 offen offset:192 +; CHECK-NEXT: buffer_load_dword v83, v2, s[0:3], 0 offen offset:140 +; CHECK-NEXT: buffer_load_dword v82, v2, s[0:3], 0 offen offset:136 +; CHECK-NEXT: buffer_load_dword v81, v2, s[0:3], 0 offen offset:132 +; CHECK-NEXT: buffer_load_dword v80, v2, s[0:3], 0 offen offset:128 +; CHECK-NEXT: buffer_load_dword v87, v2, s[0:3], 0 offen offset:76 +; CHECK-NEXT: buffer_load_dword v86, v2, s[0:3], 0 offen offset:72 +; CHECK-NEXT: buffer_load_dword v85, v2, s[0:3], 0 offen offset:68 +; CHECK-NEXT: buffer_load_dword v84, v2, s[0:3], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v96, v2, s[0:3], 0 offen +; CHECK-NEXT: buffer_load_dword v97, v2, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v98, v2, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v99, v2, s[0:3], 0 offen offset:12 +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: s_add_u32 s4, s4, 0x100 +; CHECK-NEXT: v_add_nc_u32_e32 v2, 0x100, v2 +; CHECK-NEXT: s_addc_u32 s5, s5, 0 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[52:55] offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(16) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[64:67] offset:224 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[48:51] offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(12) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[68:71] offset:192 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[31:34] offset:176 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[35:38] offset:160 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[27:30] offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(8) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[80:83] offset:128 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[19:22] offset:112 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[23:26] offset:96 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[15:18] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[84:87] offset:64 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[11:14] offset:48 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[7:10] offset:32 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[3:6] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[96:99] +; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0x800 +; CHECK-NEXT: s_cbranch_scc1 .LBB9_1 +; CHECK-NEXT: .LBB9_2: ; %Flow10 +; CHECK-NEXT: s_andn2_saveexec_b32 s8, s6 +; CHECK-NEXT: s_cbranch_execz .LBB9_5 +; CHECK-NEXT: ; %bb.3: ; %memmove_bwd_loop.preheader +; CHECK-NEXT: v_add_nc_u32_e32 v2, 0x700, v2 +; CHECK-NEXT: s_movk_i32 s6, 0xff00 +; CHECK-NEXT: s_mov_b64 s[4:5], 0x700 +; CHECK-NEXT: s_mov_b32 s7, -1 +; CHECK-NEXT: .LBB9_4: ; %memmove_bwd_loop +; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: s_clause 0x3e +; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20 +; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24 +; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28 +; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:32 +; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:36 +; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:40 +; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:44 +; CHECK-NEXT: buffer_load_dword v11, v2, s[0:3], 0 offen offset:48 +; CHECK-NEXT: buffer_load_dword v12, v2, s[0:3], 0 offen offset:52 +; CHECK-NEXT: buffer_load_dword v13, v2, s[0:3], 0 offen offset:56 +; CHECK-NEXT: buffer_load_dword v14, v2, s[0:3], 0 offen offset:60 +; CHECK-NEXT: buffer_load_dword v18, v2, s[0:3], 0 offen offset:124 +; CHECK-NEXT: buffer_load_dword v17, v2, s[0:3], 0 offen offset:120 +; CHECK-NEXT: buffer_load_dword v16, v2, s[0:3], 0 offen offset:116 +; CHECK-NEXT: buffer_load_dword v15, v2, s[0:3], 0 offen offset:112 +; CHECK-NEXT: buffer_load_dword v22, v2, s[0:3], 0 offen offset:108 +; CHECK-NEXT: buffer_load_dword v21, v2, s[0:3], 0 offen offset:104 +; CHECK-NEXT: buffer_load_dword v20, v2, s[0:3], 0 offen offset:100 +; CHECK-NEXT: buffer_load_dword v19, v2, s[0:3], 0 offen offset:96 +; CHECK-NEXT: buffer_load_dword v26, v2, s[0:3], 0 offen offset:252 +; CHECK-NEXT: buffer_load_dword v25, v2, s[0:3], 0 offen offset:248 +; CHECK-NEXT: buffer_load_dword v24, v2, s[0:3], 0 offen offset:244 +; CHECK-NEXT: buffer_load_dword v23, v2, s[0:3], 0 offen offset:240 +; CHECK-NEXT: buffer_load_dword v30, v2, s[0:3], 0 offen offset:236 +; CHECK-NEXT: buffer_load_dword v29, v2, s[0:3], 0 offen offset:232 +; CHECK-NEXT: buffer_load_dword v28, v2, s[0:3], 0 offen offset:228 +; CHECK-NEXT: buffer_load_dword v27, v2, s[0:3], 0 offen offset:224 +; CHECK-NEXT: buffer_load_dword v34, v2, s[0:3], 0 offen offset:220 +; CHECK-NEXT: buffer_load_dword v33, v2, s[0:3], 0 offen offset:216 +; CHECK-NEXT: buffer_load_dword v32, v2, s[0:3], 0 offen offset:212 +; CHECK-NEXT: buffer_load_dword v31, v2, s[0:3], 0 offen offset:208 +; CHECK-NEXT: buffer_load_dword v38, v2, s[0:3], 0 offen offset:204 +; CHECK-NEXT: buffer_load_dword v37, v2, s[0:3], 0 offen offset:200 +; CHECK-NEXT: buffer_load_dword v36, v2, s[0:3], 0 offen offset:196 +; CHECK-NEXT: buffer_load_dword v35, v2, s[0:3], 0 offen offset:192 +; CHECK-NEXT: buffer_load_dword v51, v2, s[0:3], 0 offen offset:188 +; CHECK-NEXT: buffer_load_dword v50, v2, s[0:3], 0 offen offset:184 +; CHECK-NEXT: buffer_load_dword v49, v2, s[0:3], 0 offen offset:180 +; CHECK-NEXT: buffer_load_dword v48, v2, s[0:3], 0 offen offset:176 +; CHECK-NEXT: buffer_load_dword v55, v2, s[0:3], 0 offen offset:172 +; CHECK-NEXT: buffer_load_dword v54, v2, s[0:3], 0 offen offset:168 +; CHECK-NEXT: buffer_load_dword v53, v2, s[0:3], 0 offen offset:164 +; CHECK-NEXT: buffer_load_dword v52, v2, s[0:3], 0 offen offset:160 +; CHECK-NEXT: buffer_load_dword v67, v2, s[0:3], 0 offen offset:156 +; CHECK-NEXT: buffer_load_dword v66, v2, s[0:3], 0 offen offset:152 +; CHECK-NEXT: buffer_load_dword v65, v2, s[0:3], 0 offen offset:148 +; CHECK-NEXT: buffer_load_dword v64, v2, s[0:3], 0 offen offset:144 +; CHECK-NEXT: buffer_load_dword v71, v2, s[0:3], 0 offen offset:140 +; CHECK-NEXT: buffer_load_dword v70, v2, s[0:3], 0 offen offset:136 +; CHECK-NEXT: buffer_load_dword v69, v2, s[0:3], 0 offen offset:132 +; CHECK-NEXT: buffer_load_dword v68, v2, s[0:3], 0 offen offset:128 +; CHECK-NEXT: buffer_load_dword v83, v2, s[0:3], 0 offen offset:92 +; CHECK-NEXT: buffer_load_dword v82, v2, s[0:3], 0 offen offset:88 +; CHECK-NEXT: buffer_load_dword v81, v2, s[0:3], 0 offen offset:84 +; CHECK-NEXT: buffer_load_dword v80, v2, s[0:3], 0 offen offset:80 +; CHECK-NEXT: buffer_load_dword v87, v2, s[0:3], 0 offen offset:76 +; CHECK-NEXT: buffer_load_dword v86, v2, s[0:3], 0 offen offset:72 +; CHECK-NEXT: buffer_load_dword v85, v2, s[0:3], 0 offen offset:68 +; CHECK-NEXT: buffer_load_dword v84, v2, s[0:3], 0 offen offset:64 +; CHECK-NEXT: buffer_load_dword v96, v2, s[0:3], 0 offen +; CHECK-NEXT: buffer_load_dword v97, v2, s[0:3], 0 offen offset:4 +; CHECK-NEXT: buffer_load_dword v98, v2, s[0:3], 0 offen offset:8 +; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16 +; CHECK-NEXT: buffer_load_dword v99, v2, s[0:3], 0 offen offset:12 +; CHECK-NEXT: v_add_co_u32 v100, vcc_lo, v0, s4 +; CHECK-NEXT: v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo +; CHECK-NEXT: v_add_nc_u32_e32 v2, 0xffffff00, v2 +; CHECK-NEXT: s_add_u32 s4, s4, 0xffffff00 +; CHECK-NEXT: s_addc_u32 s5, s5, -1 +; CHECK-NEXT: s_waitcnt vmcnt(41) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[23:26] offset:240 +; CHECK-NEXT: s_waitcnt vmcnt(37) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[27:30] offset:224 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[31:34] offset:208 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[35:38] offset:192 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[48:51] offset:176 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[52:55] offset:160 +; CHECK-NEXT: s_waitcnt vmcnt(17) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[64:67] offset:144 +; CHECK-NEXT: s_waitcnt vmcnt(13) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[68:71] offset:128 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[15:18] offset:112 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[19:22] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(9) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[80:83] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[84:87] offset:64 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[11:14] offset:48 +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[7:10] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[3:6] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_dwordx4 v[100:101], v[96:99] +; CHECK-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; CHECK-NEXT: s_cbranch_scc0 .LBB9_4 +; CHECK-NEXT: .LBB9_5: ; %Flow11 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +; +; ALIGNED-LABEL: memmove_p0_p5_sz2048: +; ALIGNED: ; %bb.0: ; %entry +; ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v72, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v73, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v74, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v75, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v76, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v79, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v88, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v89, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v92, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v93, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v95, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v105, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v106, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v107, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v108, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v109, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v110, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v111, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v120, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v121, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v122, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v124, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1] +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0 +; ALIGNED-NEXT: s_mov_b32 s6, exec_lo +; ALIGNED-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo +; ALIGNED-NEXT: v_cmpx_ge_u32_e64 v2, v0 +; ALIGNED-NEXT: s_xor_b32 s6, exec_lo, s6 +; ALIGNED-NEXT: s_cbranch_execz .LBB9_2 +; ALIGNED-NEXT: .LBB9_1: ; %memmove_fwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: s_clause 0x39 +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:20 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:21 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:22 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:23 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:24 +; ALIGNED-NEXT: buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:25 +; ALIGNED-NEXT: buffer_load_ubyte v12, v2, s[0:3], 0 offen offset:26 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:30 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:31 +; ALIGNED-NEXT: buffer_load_ubyte v14, v2, s[0:3], 0 offen offset:32 +; ALIGNED-NEXT: buffer_load_ubyte v15, v2, s[0:3], 0 offen offset:33 +; ALIGNED-NEXT: buffer_load_ubyte v17, v2, s[0:3], 0 offen offset:34 +; ALIGNED-NEXT: buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:29 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:28 +; ALIGNED-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:27 +; ALIGNED-NEXT: buffer_load_ubyte v19, v2, s[0:3], 0 offen offset:35 +; ALIGNED-NEXT: buffer_load_ubyte v13, v2, s[0:3], 0 offen offset:36 +; ALIGNED-NEXT: buffer_load_ubyte v16, v2, s[0:3], 0 offen offset:37 +; ALIGNED-NEXT: buffer_load_ubyte v18, v2, s[0:3], 0 offen offset:38 +; ALIGNED-NEXT: buffer_load_ubyte v20, v2, s[0:3], 0 offen offset:39 +; ALIGNED-NEXT: buffer_load_ubyte v22, v2, s[0:3], 0 offen offset:40 +; ALIGNED-NEXT: buffer_load_ubyte v23, v2, s[0:3], 0 offen offset:41 +; ALIGNED-NEXT: buffer_load_ubyte v25, v2, s[0:3], 0 offen offset:42 +; ALIGNED-NEXT: buffer_load_ubyte v27, v2, s[0:3], 0 offen offset:43 +; ALIGNED-NEXT: buffer_load_ubyte v21, v2, s[0:3], 0 offen offset:44 +; ALIGNED-NEXT: buffer_load_ubyte v24, v2, s[0:3], 0 offen offset:45 +; ALIGNED-NEXT: buffer_load_ubyte v26, v2, s[0:3], 0 offen offset:46 +; ALIGNED-NEXT: buffer_load_ubyte v28, v2, s[0:3], 0 offen offset:47 +; ALIGNED-NEXT: buffer_load_ubyte v29, v2, s[0:3], 0 offen offset:48 +; ALIGNED-NEXT: buffer_load_ubyte v30, v2, s[0:3], 0 offen offset:49 +; ALIGNED-NEXT: buffer_load_ubyte v34, v2, s[0:3], 0 offen offset:50 +; ALIGNED-NEXT: buffer_load_ubyte v32, v2, s[0:3], 0 offen offset:51 +; ALIGNED-NEXT: buffer_load_ubyte v31, v2, s[0:3], 0 offen offset:52 +; ALIGNED-NEXT: buffer_load_ubyte v36, v2, s[0:3], 0 offen offset:53 +; ALIGNED-NEXT: buffer_load_ubyte v33, v2, s[0:3], 0 offen offset:54 +; ALIGNED-NEXT: buffer_load_ubyte v35, v2, s[0:3], 0 offen offset:55 +; ALIGNED-NEXT: buffer_load_ubyte v49, v2, s[0:3], 0 offen offset:56 +; ALIGNED-NEXT: buffer_load_ubyte v50, v2, s[0:3], 0 offen offset:57 +; ALIGNED-NEXT: buffer_load_ubyte v52, v2, s[0:3], 0 offen offset:58 +; ALIGNED-NEXT: buffer_load_ubyte v38, v2, s[0:3], 0 offen offset:62 +; ALIGNED-NEXT: buffer_load_ubyte v39, v2, s[0:3], 0 offen offset:63 +; ALIGNED-NEXT: buffer_load_ubyte v53, v2, s[0:3], 0 offen offset:64 +; ALIGNED-NEXT: buffer_load_ubyte v54, v2, s[0:3], 0 offen offset:65 +; ALIGNED-NEXT: buffer_load_ubyte v65, v2, s[0:3], 0 offen offset:66 +; ALIGNED-NEXT: buffer_load_ubyte v48, v2, s[0:3], 0 offen offset:61 +; ALIGNED-NEXT: buffer_load_ubyte v37, v2, s[0:3], 0 offen offset:60 +; ALIGNED-NEXT: buffer_load_ubyte v51, v2, s[0:3], 0 offen offset:59 +; ALIGNED-NEXT: buffer_load_ubyte v55, v2, s[0:3], 0 offen offset:67 +; ALIGNED-NEXT: buffer_load_ubyte v64, v2, s[0:3], 0 offen offset:68 +; ALIGNED-NEXT: buffer_load_ubyte v66, v2, s[0:3], 0 offen offset:69 +; ALIGNED-NEXT: buffer_load_ubyte v67, v2, s[0:3], 0 offen offset:70 +; ALIGNED-NEXT: buffer_load_ubyte v68, v2, s[0:3], 0 offen offset:71 +; ALIGNED-NEXT: buffer_load_ubyte v69, v2, s[0:3], 0 offen offset:76 +; ALIGNED-NEXT: buffer_load_ubyte v70, v2, s[0:3], 0 offen offset:77 +; ALIGNED-NEXT: buffer_load_ubyte v71, v2, s[0:3], 0 offen offset:78 +; ALIGNED-NEXT: buffer_load_ubyte v80, v2, s[0:3], 0 offen offset:79 +; ALIGNED-NEXT: buffer_load_ubyte v127, v2, s[0:3], 0 offen offset:19 +; ALIGNED-NEXT: buffer_load_ubyte v81, v2, s[0:3], 0 offen offset:75 +; ALIGNED-NEXT: s_waitcnt vmcnt(57) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(56) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:716 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(55) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:720 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(54) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:724 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(53) +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:732 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(52) +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:748 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(51) +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:756 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(50) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:740 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(49) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:744 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(48) +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:768 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: s_waitcnt vmcnt(45) +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:736 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(44) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:728 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(43) +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:752 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v3, v9, 8, v5 +; ALIGNED-NEXT: s_waitcnt vmcnt(41) +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:760 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v4, v8, 8, v6 +; ALIGNED-NEXT: v_lshl_or_b32 v5, v10, 8, v7 +; ALIGNED-NEXT: v_lshl_or_b32 v6, v11, 8, v12 +; ALIGNED-NEXT: v_lshl_or_b32 v7, v15, 8, v14 +; ALIGNED-NEXT: v_lshl_or_b32 v8, v19, 8, v17 +; ALIGNED-NEXT: s_waitcnt vmcnt(40) +; ALIGNED-NEXT: v_lshl_or_b32 v9, v16, 8, v13 +; ALIGNED-NEXT: s_waitcnt vmcnt(38) +; ALIGNED-NEXT: v_lshl_or_b32 v10, v20, 8, v18 +; ALIGNED-NEXT: s_waitcnt vmcnt(36) +; ALIGNED-NEXT: v_lshl_or_b32 v11, v23, 8, v22 +; ALIGNED-NEXT: s_waitcnt vmcnt(34) +; ALIGNED-NEXT: v_lshl_or_b32 v12, v27, 8, v25 +; ALIGNED-NEXT: s_waitcnt vmcnt(32) +; ALIGNED-NEXT: v_lshl_or_b32 v13, v24, 8, v21 +; ALIGNED-NEXT: s_waitcnt vmcnt(30) +; ALIGNED-NEXT: v_lshl_or_b32 v14, v28, 8, v26 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 16, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v6, 16, v5 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v8, 16, v7 +; ALIGNED-NEXT: v_lshl_or_b32 v5, v10, 16, v9 +; ALIGNED-NEXT: v_lshl_or_b32 v6, v12, 16, v11 +; ALIGNED-NEXT: v_lshl_or_b32 v7, v14, 16, v13 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:772 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(28) +; ALIGNED-NEXT: v_lshl_or_b32 v15, v30, 8, v29 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(26) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v32, 8, v34 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:832 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(24) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v36, 8, v31 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:840 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(22) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v35, 8, v33 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:852 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:868 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(12) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v48, 8, v37 +; ALIGNED-NEXT: v_lshl_or_b32 v5, v39, 8, v38 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:876 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v6, v50, 8, v49 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:880 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(11) +; ALIGNED-NEXT: v_lshl_or_b32 v7, v51, 8, v52 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v0, 16, v15 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 16, v1 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v5, 16, v4 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:85 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v7, 16, v6 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v54, 8, v53 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:924 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(11) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v55, 8, v65 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:932 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(9) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v66, 8, v64 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:948 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v68, 8, v67 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:86 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:82 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:784 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:972 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v70, 8, v69 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:83 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:74 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:984 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v80, 8, v71 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:788 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:764 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:776 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:780 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:73 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:800 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:804 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:816 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:996 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:72 +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:820 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:792 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:796 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:808 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:812 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:828 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:836 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:856 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:848 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:844 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:872 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:860 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:864 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:888 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:900 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:896 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:884 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:892 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:904 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:912 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:908 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:920 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:928 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v65, off, s[0:3], s32 offset:944 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:936 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v64, off, s[0:3], s32 offset:940 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:952 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:956 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:960 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:964 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v70, off, s[0:3], s32 offset:968 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v71, off, s[0:3], s32 offset:976 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v80, off, s[0:3], s32 offset:980 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:1000 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 offset:1404 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:87 +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1032 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1036 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1024 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1020 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1004 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:992 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v81, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:84 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:81 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:80 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1040 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1016 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:98 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:102 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:103 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:94 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:95 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:93 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:91 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1116 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1120 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1060 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1064 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1056 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:90 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1080 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:101 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:89 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1076 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1084 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1112 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1072 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:99 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:100 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1104 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:97 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:96 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1100 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1108 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1096 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:114 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:118 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:119 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:110 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:111 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:109 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:107 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1196 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1200 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1140 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1144 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1136 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:106 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1160 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:117 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:105 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1164 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1192 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1152 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:115 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:116 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1188 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:113 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1180 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1184 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1176 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:130 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:134 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:135 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:126 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:127 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:125 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:123 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1280 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1288 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1220 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1224 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1216 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:122 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1240 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:133 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:121 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1232 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1244 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1276 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1236 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:131 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:132 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1272 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:129 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1248 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:128 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1264 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1268 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1256 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1252 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v7 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1312 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v3 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:142 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:143 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:141 +; ALIGNED-NEXT: buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:139 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1328 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:140 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1336 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1340 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1332 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1324 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:138 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:1364 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:137 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1352 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:136 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1376 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1356 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1344 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v4, 8, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1384 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:144 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1396 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x3 +; ALIGNED-NEXT: buffer_load_ubyte v124, v2, s[0:3], 0 offen offset:145 +; ALIGNED-NEXT: buffer_load_ubyte v111, v2, s[0:3], 0 offen offset:146 +; ALIGNED-NEXT: buffer_load_ubyte v120, v2, s[0:3], 0 offen offset:147 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:148 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v124, 8, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v120, 8, v111 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1400 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v121, v2, s[0:3], 0 offen offset:149 +; ALIGNED-NEXT: buffer_load_ubyte v122, v2, s[0:3], 0 offen offset:150 +; ALIGNED-NEXT: buffer_load_ubyte v109, v2, s[0:3], 0 offen offset:151 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1408 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v121, 8, v3 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v109, 8, v122 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1412 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v108, v2, s[0:3], 0 offen offset:156 +; ALIGNED-NEXT: buffer_load_ubyte v105, v2, s[0:3], 0 offen offset:157 +; ALIGNED-NEXT: buffer_load_ubyte v107, v2, s[0:3], 0 offen offset:158 +; ALIGNED-NEXT: buffer_load_ubyte v104, v2, s[0:3], 0 offen offset:159 +; ALIGNED-NEXT: buffer_load_ubyte v95, v2, s[0:3], 0 offen offset:155 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v105, 8, v108 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v104, 8, v107 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1416 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v93, v2, s[0:3], 0 offen offset:152 +; ALIGNED-NEXT: buffer_load_ubyte v92, v2, s[0:3], 0 offen offset:153 +; ALIGNED-NEXT: buffer_load_ubyte v90, v2, s[0:3], 0 offen offset:154 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v92, 8, v93 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v95, 8, v90 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1420 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v89, v2, s[0:3], 0 offen offset:160 +; ALIGNED-NEXT: buffer_load_ubyte v79, v2, s[0:3], 0 offen offset:161 +; ALIGNED-NEXT: buffer_load_ubyte v73, v2, s[0:3], 0 offen offset:162 +; ALIGNED-NEXT: buffer_load_ubyte v74, v2, s[0:3], 0 offen offset:163 +; ALIGNED-NEXT: buffer_load_ubyte v88, v2, s[0:3], 0 offen offset:164 +; ALIGNED-NEXT: buffer_load_ubyte v75, v2, s[0:3], 0 offen offset:165 +; ALIGNED-NEXT: buffer_load_ubyte v76, v2, s[0:3], 0 offen offset:166 +; ALIGNED-NEXT: buffer_load_ubyte v72, v2, s[0:3], 0 offen offset:167 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v79, 8, v89 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v74, 8, v73 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v72, 8, v76 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1424 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v75, 8, v88 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1428 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v63, v2, s[0:3], 0 offen offset:172 +; ALIGNED-NEXT: buffer_load_ubyte v61, v2, s[0:3], 0 offen offset:173 +; ALIGNED-NEXT: buffer_load_ubyte v62, v2, s[0:3], 0 offen offset:174 +; ALIGNED-NEXT: buffer_load_ubyte v60, v2, s[0:3], 0 offen offset:175 +; ALIGNED-NEXT: buffer_load_ubyte v58, v2, s[0:3], 0 offen offset:171 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v61, 8, v63 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v60, 8, v62 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1432 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v59, v2, s[0:3], 0 offen offset:168 +; ALIGNED-NEXT: buffer_load_ubyte v57, v2, s[0:3], 0 offen offset:169 +; ALIGNED-NEXT: buffer_load_ubyte v56, v2, s[0:3], 0 offen offset:170 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v57, 8, v59 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v58, 8, v56 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1436 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v47, v2, s[0:3], 0 offen offset:176 +; ALIGNED-NEXT: buffer_load_ubyte v45, v2, s[0:3], 0 offen offset:177 +; ALIGNED-NEXT: buffer_load_ubyte v41, v2, s[0:3], 0 offen offset:178 +; ALIGNED-NEXT: buffer_load_ubyte v42, v2, s[0:3], 0 offen offset:179 +; ALIGNED-NEXT: buffer_load_ubyte v46, v2, s[0:3], 0 offen offset:180 +; ALIGNED-NEXT: buffer_load_ubyte v43, v2, s[0:3], 0 offen offset:181 +; ALIGNED-NEXT: buffer_load_ubyte v44, v2, s[0:3], 0 offen offset:182 +; ALIGNED-NEXT: buffer_load_ubyte v40, v2, s[0:3], 0 offen offset:183 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v45, 8, v47 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v42, 8, v41 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v40, 8, v44 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1440 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v43, 8, v46 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1444 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v119, v2, s[0:3], 0 offen offset:188 +; ALIGNED-NEXT: buffer_load_ubyte v117, v2, s[0:3], 0 offen offset:189 +; ALIGNED-NEXT: buffer_load_ubyte v118, v2, s[0:3], 0 offen offset:190 +; ALIGNED-NEXT: buffer_load_ubyte v116, v2, s[0:3], 0 offen offset:191 +; ALIGNED-NEXT: buffer_load_ubyte v114, v2, s[0:3], 0 offen offset:187 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v117, 8, v119 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v116, 8, v118 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1448 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v115, v2, s[0:3], 0 offen offset:184 +; ALIGNED-NEXT: buffer_load_ubyte v113, v2, s[0:3], 0 offen offset:185 +; ALIGNED-NEXT: buffer_load_ubyte v112, v2, s[0:3], 0 offen offset:186 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v113, 8, v115 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v114, 8, v112 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1452 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v102, v2, s[0:3], 0 offen offset:192 +; ALIGNED-NEXT: buffer_load_ubyte v100, v2, s[0:3], 0 offen offset:193 +; ALIGNED-NEXT: buffer_load_ubyte v97, v2, s[0:3], 0 offen offset:194 +; ALIGNED-NEXT: buffer_load_ubyte v96, v2, s[0:3], 0 offen offset:195 +; ALIGNED-NEXT: buffer_load_ubyte v101, v2, s[0:3], 0 offen offset:196 +; ALIGNED-NEXT: buffer_load_ubyte v99, v2, s[0:3], 0 offen offset:197 +; ALIGNED-NEXT: buffer_load_ubyte v98, v2, s[0:3], 0 offen offset:198 +; ALIGNED-NEXT: buffer_load_ubyte v87, v2, s[0:3], 0 offen offset:199 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v100, 8, v102 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v96, 8, v97 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v87, 8, v98 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1456 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v99, 8, v101 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1460 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v85, v2, s[0:3], 0 offen offset:204 +; ALIGNED-NEXT: buffer_load_ubyte v83, v2, s[0:3], 0 offen offset:205 +; ALIGNED-NEXT: buffer_load_ubyte v84, v2, s[0:3], 0 offen offset:206 +; ALIGNED-NEXT: buffer_load_ubyte v82, v2, s[0:3], 0 offen offset:207 +; ALIGNED-NEXT: buffer_load_ubyte v81, v2, s[0:3], 0 offen offset:203 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v83, 8, v85 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v82, 8, v84 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1464 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v80, v2, s[0:3], 0 offen offset:200 +; ALIGNED-NEXT: buffer_load_ubyte v70, v2, s[0:3], 0 offen offset:201 +; ALIGNED-NEXT: buffer_load_ubyte v69, v2, s[0:3], 0 offen offset:202 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v70, 8, v80 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v81, 8, v69 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1468 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v68, v2, s[0:3], 0 offen offset:212 +; ALIGNED-NEXT: buffer_load_ubyte v54, v2, s[0:3], 0 offen offset:213 +; ALIGNED-NEXT: buffer_load_ubyte v66, v2, s[0:3], 0 offen offset:214 +; ALIGNED-NEXT: buffer_load_ubyte v53, v2, s[0:3], 0 offen offset:215 +; ALIGNED-NEXT: buffer_load_ubyte v55, v2, s[0:3], 0 offen offset:211 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v54, 8, v68 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v53, 8, v66 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1472 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v67, v2, s[0:3], 0 offen offset:216 +; ALIGNED-NEXT: buffer_load_ubyte v64, v2, s[0:3], 0 offen offset:217 +; ALIGNED-NEXT: buffer_load_ubyte v49, v2, s[0:3], 0 offen offset:218 +; ALIGNED-NEXT: buffer_load_ubyte v50, v2, s[0:3], 0 offen offset:219 +; ALIGNED-NEXT: buffer_load_ubyte v65, v2, s[0:3], 0 offen offset:220 +; ALIGNED-NEXT: buffer_load_ubyte v51, v2, s[0:3], 0 offen offset:221 +; ALIGNED-NEXT: buffer_load_ubyte v52, v2, s[0:3], 0 offen offset:222 +; ALIGNED-NEXT: buffer_load_ubyte v48, v2, s[0:3], 0 offen offset:223 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v64, 8, v67 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v50, 8, v49 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v51, 8, v65 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v48, 8, v52 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1476 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1480 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v39, v2, s[0:3], 0 offen offset:208 +; ALIGNED-NEXT: buffer_load_ubyte v37, v2, s[0:3], 0 offen offset:209 +; ALIGNED-NEXT: buffer_load_ubyte v38, v2, s[0:3], 0 offen offset:210 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v37, 8, v39 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v55, 8, v38 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1484 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v36, v2, s[0:3], 0 offen offset:224 +; ALIGNED-NEXT: buffer_load_ubyte v34, v2, s[0:3], 0 offen offset:225 +; ALIGNED-NEXT: buffer_load_ubyte v31, v2, s[0:3], 0 offen offset:226 +; ALIGNED-NEXT: buffer_load_ubyte v30, v2, s[0:3], 0 offen offset:227 +; ALIGNED-NEXT: buffer_load_ubyte v35, v2, s[0:3], 0 offen offset:228 +; ALIGNED-NEXT: buffer_load_ubyte v33, v2, s[0:3], 0 offen offset:229 +; ALIGNED-NEXT: buffer_load_ubyte v32, v2, s[0:3], 0 offen offset:230 +; ALIGNED-NEXT: buffer_load_ubyte v29, v2, s[0:3], 0 offen offset:231 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v34, 8, v36 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v30, 8, v31 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v4, 16, v3 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v33, 8, v35 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v29, 8, v32 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1488 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x17 +; ALIGNED-NEXT: buffer_load_ubyte v28, v2, s[0:3], 0 offen offset:236 +; ALIGNED-NEXT: buffer_load_ubyte v27, v2, s[0:3], 0 offen offset:237 +; ALIGNED-NEXT: buffer_load_ubyte v26, v2, s[0:3], 0 offen offset:238 +; ALIGNED-NEXT: buffer_load_ubyte v25, v2, s[0:3], 0 offen offset:239 +; ALIGNED-NEXT: buffer_load_ubyte v23, v2, s[0:3], 0 offen offset:235 +; ALIGNED-NEXT: buffer_load_ubyte v24, v2, s[0:3], 0 offen offset:232 +; ALIGNED-NEXT: buffer_load_ubyte v22, v2, s[0:3], 0 offen offset:233 +; ALIGNED-NEXT: buffer_load_ubyte v21, v2, s[0:3], 0 offen offset:234 +; ALIGNED-NEXT: buffer_load_ubyte v20, v2, s[0:3], 0 offen offset:240 +; ALIGNED-NEXT: buffer_load_ubyte v18, v2, s[0:3], 0 offen offset:241 +; ALIGNED-NEXT: buffer_load_ubyte v15, v2, s[0:3], 0 offen offset:242 +; ALIGNED-NEXT: buffer_load_ubyte v14, v2, s[0:3], 0 offen offset:243 +; ALIGNED-NEXT: buffer_load_ubyte v19, v2, s[0:3], 0 offen offset:244 +; ALIGNED-NEXT: buffer_load_ubyte v17, v2, s[0:3], 0 offen offset:245 +; ALIGNED-NEXT: buffer_load_ubyte v16, v2, s[0:3], 0 offen offset:246 +; ALIGNED-NEXT: buffer_load_ubyte v13, v2, s[0:3], 0 offen offset:247 +; ALIGNED-NEXT: buffer_load_ubyte v12, v2, s[0:3], 0 offen offset:252 +; ALIGNED-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:253 +; ALIGNED-NEXT: buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:254 +; ALIGNED-NEXT: buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:255 +; ALIGNED-NEXT: buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:251 +; ALIGNED-NEXT: buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:248 +; ALIGNED-NEXT: buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:249 +; ALIGNED-NEXT: buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:250 +; ALIGNED-NEXT: v_lshl_or_b32 v110, v4, 16, v3 +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:3 +; ALIGNED-NEXT: buffer_load_ubyte v106, v2, s[0:3], 0 offen offset:4 +; ALIGNED-NEXT: buffer_load_ubyte v123, v2, s[0:3], 0 offen offset:5 +; ALIGNED-NEXT: buffer_load_ubyte v125, v2, s[0:3], 0 offen offset:6 +; ALIGNED-NEXT: s_waitcnt vmcnt(27) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v27, 8, v28 +; ALIGNED-NEXT: s_waitcnt vmcnt(25) +; ALIGNED-NEXT: v_lshl_or_b32 v4, v25, 8, v26 +; ALIGNED-NEXT: s_waitcnt vmcnt(13) +; ALIGNED-NEXT: v_lshl_or_b32 v77, v13, 8, v16 +; ALIGNED-NEXT: s_waitcnt vmcnt(9) +; ALIGNED-NEXT: v_lshl_or_b32 v91, v9, 8, v10 +; ALIGNED-NEXT: v_lshl_or_b32 v94, v4, 16, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v22, 8, v24 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v23, 8, v21 +; ALIGNED-NEXT: v_lshl_or_b32 v78, v4, 16, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v18, 8, v20 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v14, 8, v15 +; ALIGNED-NEXT: v_lshl_or_b32 v103, v4, 16, v3 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:7 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v17, 8, v19 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1292 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v106, off, s[0:3], s32 offset:1296 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:1304 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:1308 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v86, v77, 16, v4 +; ALIGNED-NEXT: v_lshl_or_b32 v77, v11, 8, v12 +; ALIGNED-NEXT: v_lshl_or_b32 v71, v91, 16, v77 +; ALIGNED-NEXT: v_lshl_or_b32 v77, v6, 8, v8 +; ALIGNED-NEXT: v_lshl_or_b32 v91, v7, 8, v5 +; ALIGNED-NEXT: v_lshl_or_b32 v4, v91, 16, v77 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v77, v2, s[0:3], 0 offen offset:1 +; ALIGNED-NEXT: buffer_load_ubyte v91, v2, s[0:3], 0 offen offset:2 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1260 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1320 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:1284 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:1300 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v77, v77, 8, v1 +; ALIGNED-NEXT: v_lshl_or_b32 v91, v0, 8, v91 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:12 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v91, 16, v77 +; ALIGNED-NEXT: v_lshl_or_b32 v77, v123, 8, v106 +; ALIGNED-NEXT: v_lshl_or_b32 v91, v3, 8, v125 +; ALIGNED-NEXT: buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:13 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1316 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v91, 16, v77 +; ALIGNED-NEXT: buffer_load_ubyte v91, v2, s[0:3], 0 offen offset:15 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1348 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:14 +; ALIGNED-NEXT: buffer_load_ubyte v126, v2, s[0:3], 0 offen offset:11 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1360 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v77, v3, 8, v1 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1380 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:1372 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v91, v91, 8, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1368 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v91, 16, v77 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1388 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:8 +; ALIGNED-NEXT: buffer_load_ubyte v125, v2, s[0:3], 0 offen offset:9 +; ALIGNED-NEXT: buffer_load_ubyte v123, v2, s[0:3], 0 offen offset:10 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v77, v125, 8, v1 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v91, v126, 8, v123 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v91, 16, v77 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1392 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v106, v2, s[0:3], 0 offen offset:16 +; ALIGNED-NEXT: buffer_load_ubyte v77, v2, s[0:3], 0 offen offset:18 +; ALIGNED-NEXT: buffer_load_ubyte v91, v2, s[0:3], 0 offen offset:17 +; ALIGNED-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:232 +; ALIGNED-NEXT: buffer_store_dword v71, off, s[0:3], s32 offset:236 +; ALIGNED-NEXT: buffer_store_dword v86, off, s[0:3], s32 offset:228 +; ALIGNED-NEXT: buffer_store_dword v103, off, s[0:3], s32 offset:224 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:704 +; ALIGNED-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:708 +; ALIGNED-NEXT: v_add_nc_u32_e32 v2, 0x100, v2 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v127, 8, v77 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v127, v91, 8, v106 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_add_co_u32 v3, vcc_lo, v3, s4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s5, v4, vcc_lo +; ALIGNED-NEXT: flat_store_byte v[3:4], v5 offset:250 +; ALIGNED-NEXT: flat_store_byte v[3:4], v7 offset:251 +; ALIGNED-NEXT: flat_store_byte v[3:4], v6 offset:249 +; ALIGNED-NEXT: flat_store_byte v[3:4], v9 offset:255 +; ALIGNED-NEXT: flat_store_byte v[3:4], v11 offset:253 +; ALIGNED-NEXT: flat_store_byte v[3:4], v10 offset:254 +; ALIGNED-NEXT: flat_store_byte v[3:4], v12 offset:252 +; ALIGNED-NEXT: flat_store_byte v[3:4], v8 offset:248 +; ALIGNED-NEXT: flat_store_byte v[3:4], v15 offset:242 +; ALIGNED-NEXT: flat_store_byte v[3:4], v14 offset:243 +; ALIGNED-NEXT: flat_store_byte v[3:4], v18 offset:241 +; ALIGNED-NEXT: flat_store_byte v[3:4], v13 offset:247 +; ALIGNED-NEXT: flat_store_byte v[3:4], v17 offset:245 +; ALIGNED-NEXT: flat_store_byte v[3:4], v16 offset:246 +; ALIGNED-NEXT: flat_store_byte v[3:4], v19 offset:244 +; ALIGNED-NEXT: flat_store_byte v[3:4], v20 offset:240 +; ALIGNED-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:248 +; ALIGNED-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:252 +; ALIGNED-NEXT: buffer_store_dword v110, off, s[0:3], s32 offset:244 +; ALIGNED-NEXT: v_lshl_or_b32 v127, v0, 16, v127 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1488 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_add_u32 s4, s4, 0x100 +; ALIGNED-NEXT: s_addc_u32 s5, s5, 0 +; ALIGNED-NEXT: s_cmp_lg_u64 s[4:5], 0x800 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:240 +; ALIGNED-NEXT: flat_store_byte v[3:4], v21 offset:234 +; ALIGNED-NEXT: flat_store_byte v[3:4], v23 offset:235 +; ALIGNED-NEXT: flat_store_byte v[3:4], v22 offset:233 +; ALIGNED-NEXT: flat_store_byte v[3:4], v25 offset:239 +; ALIGNED-NEXT: flat_store_byte v[3:4], v27 offset:237 +; ALIGNED-NEXT: flat_store_byte v[3:4], v26 offset:238 +; ALIGNED-NEXT: flat_store_byte v[3:4], v28 offset:236 +; ALIGNED-NEXT: flat_store_byte v[3:4], v24 offset:232 +; ALIGNED-NEXT: flat_store_byte v[3:4], v31 offset:226 +; ALIGNED-NEXT: flat_store_byte v[3:4], v30 offset:227 +; ALIGNED-NEXT: flat_store_byte v[3:4], v34 offset:225 +; ALIGNED-NEXT: flat_store_byte v[3:4], v29 offset:231 +; ALIGNED-NEXT: flat_store_byte v[3:4], v33 offset:229 +; ALIGNED-NEXT: flat_store_byte v[3:4], v32 offset:230 +; ALIGNED-NEXT: flat_store_byte v[3:4], v35 offset:228 +; ALIGNED-NEXT: flat_store_byte v[3:4], v36 offset:224 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1484 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:192 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1480 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:204 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1476 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:200 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1472 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 +; ALIGNED-NEXT: flat_store_byte v[3:4], v54 offset:213 +; ALIGNED-NEXT: flat_store_byte v[3:4], v53 offset:215 +; ALIGNED-NEXT: flat_store_byte v[3:4], v37 offset:209 +; ALIGNED-NEXT: flat_store_byte v[3:4], v55 offset:211 +; ALIGNED-NEXT: flat_store_byte v[3:4], v38 offset:210 +; ALIGNED-NEXT: flat_store_byte v[3:4], v66 offset:214 +; ALIGNED-NEXT: flat_store_byte v[3:4], v68 offset:212 +; ALIGNED-NEXT: flat_store_byte v[3:4], v49 offset:218 +; ALIGNED-NEXT: flat_store_byte v[3:4], v50 offset:219 +; ALIGNED-NEXT: flat_store_byte v[3:4], v64 offset:217 +; ALIGNED-NEXT: flat_store_byte v[3:4], v48 offset:223 +; ALIGNED-NEXT: flat_store_byte v[3:4], v51 offset:221 +; ALIGNED-NEXT: flat_store_byte v[3:4], v52 offset:222 +; ALIGNED-NEXT: flat_store_byte v[3:4], v65 offset:220 +; ALIGNED-NEXT: flat_store_byte v[3:4], v67 offset:216 +; ALIGNED-NEXT: flat_store_byte v[3:4], v39 offset:208 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1468 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:216 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1464 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:220 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1460 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:212 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1456 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:208 +; ALIGNED-NEXT: flat_store_byte v[3:4], v69 offset:202 +; ALIGNED-NEXT: flat_store_byte v[3:4], v81 offset:203 +; ALIGNED-NEXT: flat_store_byte v[3:4], v70 offset:201 +; ALIGNED-NEXT: flat_store_byte v[3:4], v82 offset:207 +; ALIGNED-NEXT: flat_store_byte v[3:4], v83 offset:205 +; ALIGNED-NEXT: flat_store_byte v[3:4], v84 offset:206 +; ALIGNED-NEXT: flat_store_byte v[3:4], v85 offset:204 +; ALIGNED-NEXT: flat_store_byte v[3:4], v80 offset:200 +; ALIGNED-NEXT: flat_store_byte v[3:4], v97 offset:194 +; ALIGNED-NEXT: flat_store_byte v[3:4], v96 offset:195 +; ALIGNED-NEXT: flat_store_byte v[3:4], v100 offset:193 +; ALIGNED-NEXT: flat_store_byte v[3:4], v87 offset:199 +; ALIGNED-NEXT: flat_store_byte v[3:4], v99 offset:197 +; ALIGNED-NEXT: flat_store_byte v[3:4], v98 offset:198 +; ALIGNED-NEXT: flat_store_byte v[3:4], v101 offset:196 +; ALIGNED-NEXT: flat_store_byte v[3:4], v102 offset:192 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1452 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:296 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1448 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:300 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1444 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:292 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1440 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:288 +; ALIGNED-NEXT: flat_store_byte v[3:4], v112 offset:186 +; ALIGNED-NEXT: flat_store_byte v[3:4], v114 offset:187 +; ALIGNED-NEXT: flat_store_byte v[3:4], v113 offset:185 +; ALIGNED-NEXT: flat_store_byte v[3:4], v116 offset:191 +; ALIGNED-NEXT: flat_store_byte v[3:4], v117 offset:189 +; ALIGNED-NEXT: flat_store_byte v[3:4], v118 offset:190 +; ALIGNED-NEXT: flat_store_byte v[3:4], v119 offset:188 +; ALIGNED-NEXT: flat_store_byte v[3:4], v115 offset:184 +; ALIGNED-NEXT: flat_store_byte v[3:4], v41 offset:178 +; ALIGNED-NEXT: flat_store_byte v[3:4], v42 offset:179 +; ALIGNED-NEXT: flat_store_byte v[3:4], v45 offset:177 +; ALIGNED-NEXT: flat_store_byte v[3:4], v40 offset:183 +; ALIGNED-NEXT: flat_store_byte v[3:4], v43 offset:181 +; ALIGNED-NEXT: flat_store_byte v[3:4], v44 offset:182 +; ALIGNED-NEXT: flat_store_byte v[3:4], v46 offset:180 +; ALIGNED-NEXT: flat_store_byte v[3:4], v47 offset:176 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1436 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:312 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1432 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:316 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1428 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:308 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1424 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:304 +; ALIGNED-NEXT: flat_store_byte v[3:4], v56 offset:170 +; ALIGNED-NEXT: flat_store_byte v[3:4], v58 offset:171 +; ALIGNED-NEXT: flat_store_byte v[3:4], v57 offset:169 +; ALIGNED-NEXT: flat_store_byte v[3:4], v60 offset:175 +; ALIGNED-NEXT: flat_store_byte v[3:4], v61 offset:173 +; ALIGNED-NEXT: flat_store_byte v[3:4], v62 offset:174 +; ALIGNED-NEXT: flat_store_byte v[3:4], v63 offset:172 +; ALIGNED-NEXT: flat_store_byte v[3:4], v59 offset:168 +; ALIGNED-NEXT: flat_store_byte v[3:4], v73 offset:162 +; ALIGNED-NEXT: flat_store_byte v[3:4], v74 offset:163 +; ALIGNED-NEXT: flat_store_byte v[3:4], v79 offset:161 +; ALIGNED-NEXT: flat_store_byte v[3:4], v72 offset:167 +; ALIGNED-NEXT: flat_store_byte v[3:4], v75 offset:165 +; ALIGNED-NEXT: flat_store_byte v[3:4], v76 offset:166 +; ALIGNED-NEXT: flat_store_byte v[3:4], v88 offset:164 +; ALIGNED-NEXT: flat_store_byte v[3:4], v89 offset:160 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1420 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:264 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1416 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:268 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1412 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1408 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:256 +; ALIGNED-NEXT: flat_store_byte v[3:4], v90 offset:154 +; ALIGNED-NEXT: flat_store_byte v[3:4], v95 offset:155 +; ALIGNED-NEXT: flat_store_byte v[3:4], v92 offset:153 +; ALIGNED-NEXT: flat_store_byte v[3:4], v104 offset:159 +; ALIGNED-NEXT: flat_store_byte v[3:4], v105 offset:157 +; ALIGNED-NEXT: flat_store_byte v[3:4], v107 offset:158 +; ALIGNED-NEXT: flat_store_byte v[3:4], v108 offset:156 +; ALIGNED-NEXT: flat_store_byte v[3:4], v93 offset:152 +; ALIGNED-NEXT: flat_store_byte v[3:4], v111 offset:146 +; ALIGNED-NEXT: flat_store_byte v[3:4], v120 offset:147 +; ALIGNED-NEXT: flat_store_byte v[3:4], v124 offset:145 +; ALIGNED-NEXT: flat_store_byte v[3:4], v109 offset:151 +; ALIGNED-NEXT: flat_store_byte v[3:4], v121 offset:149 +; ALIGNED-NEXT: flat_store_byte v[3:4], v122 offset:150 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1400 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:148 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1396 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:144 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1384 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:280 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1352 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:284 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1328 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:276 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1312 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:272 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1376 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:138 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1364 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:139 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1356 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:137 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1340 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:143 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1332 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:141 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1336 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:142 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1324 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:140 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1344 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:136 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1272 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:130 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1264 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:131 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1256 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:129 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1288 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:135 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1276 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:133 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1280 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:134 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1268 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:132 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1252 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:128 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1248 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:360 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1232 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:364 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:356 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:352 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1244 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:122 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1240 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:123 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1236 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:121 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1224 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:127 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1216 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:125 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1220 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:126 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:124 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:120 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1188 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:114 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1180 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:115 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1176 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:113 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1200 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:119 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1192 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:117 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1196 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:118 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1184 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:116 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:112 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:376 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:380 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:372 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:368 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1164 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:106 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1160 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:107 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1152 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:105 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1144 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:111 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1136 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:109 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1140 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:110 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:108 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:104 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:98 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:99 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:97 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:103 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:101 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:102 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:100 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:96 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:328 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:332 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:324 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:320 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:90 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:91 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:89 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:95 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:93 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:94 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:92 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:88 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:82 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:83 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:81 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:87 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:85 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:86 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:84 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:80 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:344 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:996 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:348 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:340 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:972 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:336 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:74 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:75 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:73 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:980 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:79 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:968 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:77 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:976 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:78 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:964 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:76 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:72 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:944 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:66 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:936 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:67 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:928 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:65 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:960 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:71 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:952 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:69 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:956 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:70 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:940 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:68 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:920 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:64 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:948 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:424 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:932 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:428 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:924 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:420 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:416 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:896 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:61 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:912 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:58 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:908 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:59 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:904 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:57 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:900 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:63 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:888 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:62 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:884 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:60 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:892 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:56 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:872 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:53 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:856 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:50 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:848 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:51 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:836 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:49 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:864 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:55 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:860 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:54 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:844 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:52 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:828 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:48 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:880 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:444 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:876 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:440 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:868 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:436 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:852 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:432 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:820 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:43 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:816 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:42 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:804 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:41 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:800 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:40 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:812 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:47 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:808 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:46 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:796 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:45 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:792 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:44 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:788 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:35 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:784 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:34 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:772 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:33 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:768 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:32 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:39 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:776 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:38 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:764 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:37 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:760 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:36 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:840 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:392 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:832 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:396 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:388 +; ALIGNED-NEXT: buffer_store_dword v127, off, s[0:3], s32 offset:384 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:756 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:26 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:752 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:27 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:748 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:25 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:744 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:31 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:736 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:29 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:740 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:30 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:728 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:28 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:732 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:24 +; ALIGNED-NEXT: flat_store_byte v[3:4], v77 offset:18 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1404 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:19 +; ALIGNED-NEXT: flat_store_byte v[3:4], v91 offset:17 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:724 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:23 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:716 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:21 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:720 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:22 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:20 +; ALIGNED-NEXT: flat_store_byte v[3:4], v106 offset:16 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1392 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:408 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1388 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:412 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1348 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:404 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1316 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:400 +; ALIGNED-NEXT: flat_store_byte v[3:4], v123 offset:10 +; ALIGNED-NEXT: flat_store_byte v[3:4], v126 offset:11 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1380 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:13 +; ALIGNED-NEXT: flat_store_byte v[3:4], v125 offset:9 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1372 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:15 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1368 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:14 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1360 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:12 +; ALIGNED-NEXT: flat_store_byte v[3:4], v1 offset:8 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1300 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:2 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1292 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:3 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1284 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:1 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1320 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:7 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1304 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:5 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1308 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:6 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1296 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 offset:4 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1260 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[3:4], v0 +; ALIGNED-NEXT: s_cbranch_scc1 .LBB9_1 +; ALIGNED-NEXT: .LBB9_2: ; %Flow10 +; ALIGNED-NEXT: s_andn2_saveexec_b32 s8, s6 +; ALIGNED-NEXT: s_cbranch_execz .LBB9_5 +; ALIGNED-NEXT: ; %bb.3: ; %memmove_bwd_loop.preheader +; ALIGNED-NEXT: v_add_nc_u32_e32 v4, 0x700, v2 +; ALIGNED-NEXT: s_movk_i32 s6, 0xff00 +; ALIGNED-NEXT: s_mov_b64 s[4:5], 0x700 +; ALIGNED-NEXT: s_mov_b32 s7, -1 +; ALIGNED-NEXT: .LBB9_4: ; %memmove_bwd_loop +; ALIGNED-NEXT: ; =>This Inner Loop Header: Depth=1 +; ALIGNED-NEXT: s_clause 0x3a +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:20 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:21 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:22 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:23 +; ALIGNED-NEXT: buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:24 +; ALIGNED-NEXT: buffer_load_ubyte v10, v4, s[0:3], 0 offen offset:25 +; ALIGNED-NEXT: buffer_load_ubyte v12, v4, s[0:3], 0 offen offset:26 +; ALIGNED-NEXT: buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:30 +; ALIGNED-NEXT: buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:31 +; ALIGNED-NEXT: buffer_load_ubyte v14, v4, s[0:3], 0 offen offset:32 +; ALIGNED-NEXT: buffer_load_ubyte v15, v4, s[0:3], 0 offen offset:33 +; ALIGNED-NEXT: buffer_load_ubyte v17, v4, s[0:3], 0 offen offset:34 +; ALIGNED-NEXT: buffer_load_ubyte v9, v4, s[0:3], 0 offen offset:29 +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:28 +; ALIGNED-NEXT: buffer_load_ubyte v11, v4, s[0:3], 0 offen offset:27 +; ALIGNED-NEXT: buffer_load_ubyte v19, v4, s[0:3], 0 offen offset:35 +; ALIGNED-NEXT: buffer_load_ubyte v13, v4, s[0:3], 0 offen offset:36 +; ALIGNED-NEXT: buffer_load_ubyte v16, v4, s[0:3], 0 offen offset:37 +; ALIGNED-NEXT: buffer_load_ubyte v18, v4, s[0:3], 0 offen offset:38 +; ALIGNED-NEXT: buffer_load_ubyte v20, v4, s[0:3], 0 offen offset:39 +; ALIGNED-NEXT: buffer_load_ubyte v22, v4, s[0:3], 0 offen offset:40 +; ALIGNED-NEXT: buffer_load_ubyte v23, v4, s[0:3], 0 offen offset:41 +; ALIGNED-NEXT: buffer_load_ubyte v25, v4, s[0:3], 0 offen offset:42 +; ALIGNED-NEXT: buffer_load_ubyte v28, v4, s[0:3], 0 offen offset:43 +; ALIGNED-NEXT: buffer_load_ubyte v21, v4, s[0:3], 0 offen offset:44 +; ALIGNED-NEXT: buffer_load_ubyte v24, v4, s[0:3], 0 offen offset:45 +; ALIGNED-NEXT: buffer_load_ubyte v26, v4, s[0:3], 0 offen offset:46 +; ALIGNED-NEXT: buffer_load_ubyte v27, v4, s[0:3], 0 offen offset:47 +; ALIGNED-NEXT: buffer_load_ubyte v30, v4, s[0:3], 0 offen offset:48 +; ALIGNED-NEXT: buffer_load_ubyte v31, v4, s[0:3], 0 offen offset:49 +; ALIGNED-NEXT: buffer_load_ubyte v33, v4, s[0:3], 0 offen offset:50 +; ALIGNED-NEXT: buffer_load_ubyte v34, v4, s[0:3], 0 offen offset:51 +; ALIGNED-NEXT: buffer_load_ubyte v32, v4, s[0:3], 0 offen offset:52 +; ALIGNED-NEXT: buffer_load_ubyte v37, v4, s[0:3], 0 offen offset:53 +; ALIGNED-NEXT: buffer_load_ubyte v35, v4, s[0:3], 0 offen offset:54 +; ALIGNED-NEXT: buffer_load_ubyte v36, v4, s[0:3], 0 offen offset:55 +; ALIGNED-NEXT: buffer_load_ubyte v48, v4, s[0:3], 0 offen offset:56 +; ALIGNED-NEXT: buffer_load_ubyte v51, v4, s[0:3], 0 offen offset:57 +; ALIGNED-NEXT: buffer_load_ubyte v52, v4, s[0:3], 0 offen offset:58 +; ALIGNED-NEXT: buffer_load_ubyte v38, v4, s[0:3], 0 offen offset:60 +; ALIGNED-NEXT: buffer_load_ubyte v50, v4, s[0:3], 0 offen offset:61 +; ALIGNED-NEXT: buffer_load_ubyte v39, v4, s[0:3], 0 offen offset:62 +; ALIGNED-NEXT: buffer_load_ubyte v49, v4, s[0:3], 0 offen offset:63 +; ALIGNED-NEXT: buffer_load_ubyte v29, v4, s[0:3], 0 offen offset:64 +; ALIGNED-NEXT: buffer_load_ubyte v55, v4, s[0:3], 0 offen offset:65 +; ALIGNED-NEXT: buffer_load_ubyte v66, v4, s[0:3], 0 offen offset:66 +; ALIGNED-NEXT: buffer_load_ubyte v53, v4, s[0:3], 0 offen offset:59 +; ALIGNED-NEXT: buffer_load_ubyte v67, v4, s[0:3], 0 offen offset:67 +; ALIGNED-NEXT: buffer_load_ubyte v54, v4, s[0:3], 0 offen offset:68 +; ALIGNED-NEXT: buffer_load_ubyte v64, v4, s[0:3], 0 offen offset:69 +; ALIGNED-NEXT: buffer_load_ubyte v65, v4, s[0:3], 0 offen offset:70 +; ALIGNED-NEXT: buffer_load_ubyte v68, v4, s[0:3], 0 offen offset:71 +; ALIGNED-NEXT: buffer_load_ubyte v69, v4, s[0:3], 0 offen offset:76 +; ALIGNED-NEXT: buffer_load_ubyte v70, v4, s[0:3], 0 offen offset:77 +; ALIGNED-NEXT: buffer_load_ubyte v71, v4, s[0:3], 0 offen offset:78 +; ALIGNED-NEXT: buffer_load_ubyte v80, v4, s[0:3], 0 offen offset:79 +; ALIGNED-NEXT: buffer_load_ubyte v126, v4, s[0:3], 0 offen offset:19 +; ALIGNED-NEXT: buffer_load_ubyte v81, v4, s[0:3], 0 offen offset:75 +; ALIGNED-NEXT: buffer_load_ubyte v125, v4, s[0:3], 0 offen offset:151 +; ALIGNED-NEXT: s_waitcnt vmcnt(58) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(57) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:716 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(56) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:720 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(55) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:724 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(54) +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:732 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(53) +; ALIGNED-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:748 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(52) +; ALIGNED-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:756 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(51) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:740 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(50) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:744 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(49) +; ALIGNED-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:768 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2 +; ALIGNED-NEXT: s_waitcnt vmcnt(46) +; ALIGNED-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:736 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(45) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:728 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(44) +; ALIGNED-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:752 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v2, v9, 8, v5 +; ALIGNED-NEXT: s_waitcnt vmcnt(42) +; ALIGNED-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:760 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v3, v8, 8, v6 +; ALIGNED-NEXT: v_lshl_or_b32 v5, v10, 8, v7 +; ALIGNED-NEXT: v_lshl_or_b32 v6, v11, 8, v12 +; ALIGNED-NEXT: v_lshl_or_b32 v7, v15, 8, v14 +; ALIGNED-NEXT: v_lshl_or_b32 v8, v19, 8, v17 +; ALIGNED-NEXT: s_waitcnt vmcnt(41) +; ALIGNED-NEXT: v_lshl_or_b32 v9, v16, 8, v13 +; ALIGNED-NEXT: s_waitcnt vmcnt(39) +; ALIGNED-NEXT: v_lshl_or_b32 v10, v20, 8, v18 +; ALIGNED-NEXT: s_waitcnt vmcnt(37) +; ALIGNED-NEXT: v_lshl_or_b32 v11, v23, 8, v22 +; ALIGNED-NEXT: s_waitcnt vmcnt(35) +; ALIGNED-NEXT: v_lshl_or_b32 v12, v28, 8, v25 +; ALIGNED-NEXT: s_waitcnt vmcnt(33) +; ALIGNED-NEXT: v_lshl_or_b32 v13, v24, 8, v21 +; ALIGNED-NEXT: s_waitcnt vmcnt(31) +; ALIGNED-NEXT: v_lshl_or_b32 v14, v27, 8, v26 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 16, v2 +; ALIGNED-NEXT: v_lshl_or_b32 v2, v6, 16, v5 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v8, 16, v7 +; ALIGNED-NEXT: v_lshl_or_b32 v5, v10, 16, v9 +; ALIGNED-NEXT: v_lshl_or_b32 v6, v12, 16, v11 +; ALIGNED-NEXT: v_lshl_or_b32 v7, v14, 16, v13 +; ALIGNED-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:772 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(29) +; ALIGNED-NEXT: v_lshl_or_b32 v15, v31, 8, v30 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(27) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v34, 8, v33 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:796 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(25) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v37, 8, v32 +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:800 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(23) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v36, 8, v35 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:840 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(18) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v50, 8, v38 +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:852 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(16) +; ALIGNED-NEXT: v_lshl_or_b32 v5, v49, 8, v39 +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:868 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v6, v51, 8, v48 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:880 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(12) +; ALIGNED-NEXT: v_lshl_or_b32 v7, v53, 8, v52 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v0, 16, v15 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v2, 16, v1 +; ALIGNED-NEXT: v_lshl_or_b32 v2, v5, 16, v3 +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:85 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v7, 16, v6 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v55, 8, v29 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:920 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(12) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v67, 8, v66 +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:928 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(10) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v64, 8, v54 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:932 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(8) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v68, 8, v65 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:86 +; ALIGNED-NEXT: buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:82 +; ALIGNED-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:788 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 16, v2 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:976 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(8) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v70, 8, v69 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:83 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:74 +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:988 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(8) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v80, 8, v71 +; ALIGNED-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:792 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:764 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:776 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:784 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:73 +; ALIGNED-NEXT: buffer_store_dword v22, off, s[0:3], s32 offset:812 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v23, off, s[0:3], s32 offset:816 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v25, off, s[0:3], s32 offset:828 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:72 +; ALIGNED-NEXT: buffer_store_dword v28, off, s[0:3], s32 offset:832 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v21, off, s[0:3], s32 offset:804 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v24, off, s[0:3], s32 offset:808 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v26, off, s[0:3], s32 offset:820 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v27, off, s[0:3], s32 offset:824 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:836 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:844 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:860 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:856 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:848 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v37, off, s[0:3], s32 offset:876 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v35, off, s[0:3], s32 offset:864 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v36, off, s[0:3], s32 offset:872 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v38, off, s[0:3], s32 offset:884 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v50, off, s[0:3], s32 offset:900 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v39, off, s[0:3], s32 offset:888 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v49, off, s[0:3], s32 offset:896 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v48, off, s[0:3], s32 offset:892 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v51, off, s[0:3], s32 offset:904 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v52, off, s[0:3], s32 offset:912 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v53, off, s[0:3], s32 offset:908 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v29, off, s[0:3], s32 offset:924 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v55, off, s[0:3], s32 offset:936 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v66, off, s[0:3], s32 offset:956 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v67, off, s[0:3], s32 offset:948 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v54, off, s[0:3], s32 offset:940 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v64, off, s[0:3], s32 offset:944 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v65, off, s[0:3], s32 offset:952 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v68, off, s[0:3], s32 offset:960 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v69, off, s[0:3], s32 offset:964 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v70, off, s[0:3], s32 offset:968 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v71, off, s[0:3], s32 offset:972 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_store_dword v80, off, s[0:3], s32 offset:980 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(9) +; ALIGNED-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:1416 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(8) +; ALIGNED-NEXT: buffer_store_dword v81, off, s[0:3], s32 offset:1000 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:87 +; ALIGNED-NEXT: s_waitcnt vmcnt(7) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1032 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1036 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1020 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1004 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:996 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v81, 8, v2 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:84 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:81 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:80 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1040 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1024 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1016 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:98 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:102 +; ALIGNED-NEXT: buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:103 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v2 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:94 +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:95 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:93 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:91 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:92 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1116 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1120 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1060 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1064 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1056 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v2 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:90 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1080 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:101 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:89 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:88 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1084 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1112 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1076 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:99 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:100 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1108 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:97 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:96 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1100 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1104 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1096 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:114 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:118 +; ALIGNED-NEXT: buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:119 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v2 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:110 +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:111 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:109 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:107 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:108 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1196 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1200 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1140 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1144 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1136 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v2 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:106 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1160 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:117 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:105 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1152 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:104 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1164 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1192 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1156 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:115 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:116 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1188 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:113 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:112 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1180 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1184 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1176 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:130 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:134 +; ALIGNED-NEXT: buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:135 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v2 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:126 +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:127 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:125 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:123 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:124 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1276 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:1280 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1220 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1224 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1216 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v2 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:122 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1240 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:133 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:121 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1232 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:120 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1244 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1272 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1236 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:131 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:132 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1268 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:129 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1248 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:128 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1260 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1264 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1256 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1252 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v7 +; ALIGNED-NEXT: buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:146 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v8, 8, v6 +; ALIGNED-NEXT: buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:150 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1284 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v2 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:142 +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:143 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:141 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:139 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1292 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:140 +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:1372 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1300 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1304 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1296 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1288 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v5, 8, v2 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:138 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1320 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:149 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:137 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1312 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:136 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1324 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:1368 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1316 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1308 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:147 +; ALIGNED-NEXT: buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:148 +; ALIGNED-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:1356 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:145 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1328 ; 4-byte Folded Spill +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:144 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:1348 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:1352 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:1340 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1332 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 8, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v7 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: v_lshl_or_b32 v1, v125, 8, v6 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1384 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v5, 8, v2 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1392 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v122, v4, s[0:3], 0 offen offset:156 +; ALIGNED-NEXT: buffer_load_ubyte v111, v4, s[0:3], 0 offen offset:157 +; ALIGNED-NEXT: buffer_load_ubyte v120, v4, s[0:3], 0 offen offset:158 +; ALIGNED-NEXT: buffer_load_ubyte v109, v4, s[0:3], 0 offen offset:159 +; ALIGNED-NEXT: buffer_load_ubyte v106, v4, s[0:3], 0 offen offset:155 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v111, 8, v122 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v109, 8, v120 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1400 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v105, v4, s[0:3], 0 offen offset:152 +; ALIGNED-NEXT: buffer_load_ubyte v94, v4, s[0:3], 0 offen offset:153 +; ALIGNED-NEXT: buffer_load_ubyte v92, v4, s[0:3], 0 offen offset:154 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v94, 8, v105 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v106, 8, v92 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1408 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v89, v4, s[0:3], 0 offen offset:160 +; ALIGNED-NEXT: buffer_load_ubyte v79, v4, s[0:3], 0 offen offset:161 +; ALIGNED-NEXT: buffer_load_ubyte v73, v4, s[0:3], 0 offen offset:162 +; ALIGNED-NEXT: buffer_load_ubyte v74, v4, s[0:3], 0 offen offset:163 +; ALIGNED-NEXT: buffer_load_ubyte v88, v4, s[0:3], 0 offen offset:164 +; ALIGNED-NEXT: buffer_load_ubyte v75, v4, s[0:3], 0 offen offset:165 +; ALIGNED-NEXT: buffer_load_ubyte v77, v4, s[0:3], 0 offen offset:166 +; ALIGNED-NEXT: buffer_load_ubyte v72, v4, s[0:3], 0 offen offset:167 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v79, 8, v89 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v74, 8, v73 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v72, 8, v77 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1420 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v75, 8, v88 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1424 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v63, v4, s[0:3], 0 offen offset:172 +; ALIGNED-NEXT: buffer_load_ubyte v61, v4, s[0:3], 0 offen offset:173 +; ALIGNED-NEXT: buffer_load_ubyte v62, v4, s[0:3], 0 offen offset:174 +; ALIGNED-NEXT: buffer_load_ubyte v60, v4, s[0:3], 0 offen offset:175 +; ALIGNED-NEXT: buffer_load_ubyte v58, v4, s[0:3], 0 offen offset:171 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v61, 8, v63 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v60, 8, v62 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1428 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v59, v4, s[0:3], 0 offen offset:168 +; ALIGNED-NEXT: buffer_load_ubyte v56, v4, s[0:3], 0 offen offset:169 +; ALIGNED-NEXT: buffer_load_ubyte v47, v4, s[0:3], 0 offen offset:170 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v56, 8, v59 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v58, 8, v47 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1432 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v46, v4, s[0:3], 0 offen offset:176 +; ALIGNED-NEXT: buffer_load_ubyte v44, v4, s[0:3], 0 offen offset:177 +; ALIGNED-NEXT: buffer_load_ubyte v119, v4, s[0:3], 0 offen offset:178 +; ALIGNED-NEXT: buffer_load_ubyte v40, v4, s[0:3], 0 offen offset:179 +; ALIGNED-NEXT: buffer_load_ubyte v45, v4, s[0:3], 0 offen offset:180 +; ALIGNED-NEXT: buffer_load_ubyte v41, v4, s[0:3], 0 offen offset:181 +; ALIGNED-NEXT: buffer_load_ubyte v42, v4, s[0:3], 0 offen offset:182 +; ALIGNED-NEXT: buffer_load_ubyte v118, v4, s[0:3], 0 offen offset:183 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v44, 8, v46 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v40, 8, v119 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v118, 8, v42 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1436 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v41, 8, v45 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1440 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v117, v4, s[0:3], 0 offen offset:188 +; ALIGNED-NEXT: buffer_load_ubyte v115, v4, s[0:3], 0 offen offset:189 +; ALIGNED-NEXT: buffer_load_ubyte v116, v4, s[0:3], 0 offen offset:190 +; ALIGNED-NEXT: buffer_load_ubyte v114, v4, s[0:3], 0 offen offset:191 +; ALIGNED-NEXT: buffer_load_ubyte v112, v4, s[0:3], 0 offen offset:187 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v115, 8, v117 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v114, 8, v116 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1444 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v113, v4, s[0:3], 0 offen offset:184 +; ALIGNED-NEXT: buffer_load_ubyte v103, v4, s[0:3], 0 offen offset:185 +; ALIGNED-NEXT: buffer_load_ubyte v102, v4, s[0:3], 0 offen offset:186 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v103, 8, v113 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v1, v112, 8, v102 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1448 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v100, v4, s[0:3], 0 offen offset:192 +; ALIGNED-NEXT: buffer_load_ubyte v98, v4, s[0:3], 0 offen offset:193 +; ALIGNED-NEXT: buffer_load_ubyte v87, v4, s[0:3], 0 offen offset:194 +; ALIGNED-NEXT: buffer_load_ubyte v86, v4, s[0:3], 0 offen offset:195 +; ALIGNED-NEXT: buffer_load_ubyte v99, v4, s[0:3], 0 offen offset:196 +; ALIGNED-NEXT: buffer_load_ubyte v97, v4, s[0:3], 0 offen offset:197 +; ALIGNED-NEXT: buffer_load_ubyte v96, v4, s[0:3], 0 offen offset:198 +; ALIGNED-NEXT: buffer_load_ubyte v85, v4, s[0:3], 0 offen offset:199 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v98, 8, v100 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v86, 8, v87 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v97, 8, v99 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v85, 8, v96 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1452 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1456 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v83, v4, s[0:3], 0 offen offset:204 +; ALIGNED-NEXT: buffer_load_ubyte v81, v4, s[0:3], 0 offen offset:205 +; ALIGNED-NEXT: buffer_load_ubyte v82, v4, s[0:3], 0 offen offset:206 +; ALIGNED-NEXT: buffer_load_ubyte v80, v4, s[0:3], 0 offen offset:207 +; ALIGNED-NEXT: buffer_load_ubyte v71, v4, s[0:3], 0 offen offset:203 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v81, 8, v83 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v80, 8, v82 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1460 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v70, v4, s[0:3], 0 offen offset:200 +; ALIGNED-NEXT: buffer_load_ubyte v69, v4, s[0:3], 0 offen offset:201 +; ALIGNED-NEXT: buffer_load_ubyte v68, v4, s[0:3], 0 offen offset:202 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v69, 8, v70 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v71, 8, v68 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1464 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v67, v4, s[0:3], 0 offen offset:212 +; ALIGNED-NEXT: buffer_load_ubyte v54, v4, s[0:3], 0 offen offset:213 +; ALIGNED-NEXT: buffer_load_ubyte v65, v4, s[0:3], 0 offen offset:214 +; ALIGNED-NEXT: buffer_load_ubyte v52, v4, s[0:3], 0 offen offset:215 +; ALIGNED-NEXT: buffer_load_ubyte v55, v4, s[0:3], 0 offen offset:211 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v54, 8, v67 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v52, 8, v65 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1468 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v66, v4, s[0:3], 0 offen offset:216 +; ALIGNED-NEXT: buffer_load_ubyte v53, v4, s[0:3], 0 offen offset:217 +; ALIGNED-NEXT: buffer_load_ubyte v49, v4, s[0:3], 0 offen offset:218 +; ALIGNED-NEXT: buffer_load_ubyte v48, v4, s[0:3], 0 offen offset:219 +; ALIGNED-NEXT: buffer_load_ubyte v64, v4, s[0:3], 0 offen offset:220 +; ALIGNED-NEXT: buffer_load_ubyte v51, v4, s[0:3], 0 offen offset:221 +; ALIGNED-NEXT: buffer_load_ubyte v50, v4, s[0:3], 0 offen offset:222 +; ALIGNED-NEXT: buffer_load_ubyte v39, v4, s[0:3], 0 offen offset:223 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v53, 8, v66 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v48, 8, v49 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v51, 8, v64 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v39, 8, v50 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1472 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1476 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v38, v4, s[0:3], 0 offen offset:208 +; ALIGNED-NEXT: buffer_load_ubyte v36, v4, s[0:3], 0 offen offset:209 +; ALIGNED-NEXT: buffer_load_ubyte v37, v4, s[0:3], 0 offen offset:210 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v36, 8, v38 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v55, 8, v37 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1480 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x7 +; ALIGNED-NEXT: buffer_load_ubyte v35, v4, s[0:3], 0 offen offset:224 +; ALIGNED-NEXT: buffer_load_ubyte v33, v4, s[0:3], 0 offen offset:225 +; ALIGNED-NEXT: buffer_load_ubyte v29, v4, s[0:3], 0 offen offset:226 +; ALIGNED-NEXT: buffer_load_ubyte v30, v4, s[0:3], 0 offen offset:227 +; ALIGNED-NEXT: buffer_load_ubyte v34, v4, s[0:3], 0 offen offset:228 +; ALIGNED-NEXT: buffer_load_ubyte v31, v4, s[0:3], 0 offen offset:229 +; ALIGNED-NEXT: buffer_load_ubyte v32, v4, s[0:3], 0 offen offset:230 +; ALIGNED-NEXT: buffer_load_ubyte v28, v4, s[0:3], 0 offen offset:231 +; ALIGNED-NEXT: s_waitcnt vmcnt(6) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v33, 8, v35 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v30, 8, v29 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v31, 8, v34 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v28, 8, v32 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1484 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x17 +; ALIGNED-NEXT: buffer_load_ubyte v27, v4, s[0:3], 0 offen offset:236 +; ALIGNED-NEXT: buffer_load_ubyte v25, v4, s[0:3], 0 offen offset:237 +; ALIGNED-NEXT: buffer_load_ubyte v26, v4, s[0:3], 0 offen offset:238 +; ALIGNED-NEXT: buffer_load_ubyte v24, v4, s[0:3], 0 offen offset:239 +; ALIGNED-NEXT: buffer_load_ubyte v23, v4, s[0:3], 0 offen offset:235 +; ALIGNED-NEXT: buffer_load_ubyte v22, v4, s[0:3], 0 offen offset:232 +; ALIGNED-NEXT: buffer_load_ubyte v21, v4, s[0:3], 0 offen offset:233 +; ALIGNED-NEXT: buffer_load_ubyte v20, v4, s[0:3], 0 offen offset:234 +; ALIGNED-NEXT: buffer_load_ubyte v19, v4, s[0:3], 0 offen offset:240 +; ALIGNED-NEXT: buffer_load_ubyte v17, v4, s[0:3], 0 offen offset:241 +; ALIGNED-NEXT: buffer_load_ubyte v13, v4, s[0:3], 0 offen offset:242 +; ALIGNED-NEXT: buffer_load_ubyte v14, v4, s[0:3], 0 offen offset:243 +; ALIGNED-NEXT: buffer_load_ubyte v18, v4, s[0:3], 0 offen offset:244 +; ALIGNED-NEXT: buffer_load_ubyte v15, v4, s[0:3], 0 offen offset:245 +; ALIGNED-NEXT: buffer_load_ubyte v16, v4, s[0:3], 0 offen offset:246 +; ALIGNED-NEXT: buffer_load_ubyte v12, v4, s[0:3], 0 offen offset:247 +; ALIGNED-NEXT: buffer_load_ubyte v11, v4, s[0:3], 0 offen offset:252 +; ALIGNED-NEXT: buffer_load_ubyte v9, v4, s[0:3], 0 offen offset:253 +; ALIGNED-NEXT: buffer_load_ubyte v10, v4, s[0:3], 0 offen offset:254 +; ALIGNED-NEXT: buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:255 +; ALIGNED-NEXT: buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:251 +; ALIGNED-NEXT: buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:248 +; ALIGNED-NEXT: buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:249 +; ALIGNED-NEXT: buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:250 +; ALIGNED-NEXT: v_lshl_or_b32 v123, v3, 16, v2 +; ALIGNED-NEXT: buffer_load_ubyte v0, v4, s[0:3], 0 offen +; ALIGNED-NEXT: s_waitcnt vmcnt(23) +; ALIGNED-NEXT: v_lshl_or_b32 v2, v25, 8, v27 +; ALIGNED-NEXT: s_waitcnt vmcnt(21) +; ALIGNED-NEXT: v_lshl_or_b32 v3, v24, 8, v26 +; ALIGNED-NEXT: s_waitcnt vmcnt(9) +; ALIGNED-NEXT: v_lshl_or_b32 v43, v12, 8, v16 +; ALIGNED-NEXT: s_waitcnt vmcnt(5) +; ALIGNED-NEXT: v_lshl_or_b32 v57, v8, 8, v10 +; ALIGNED-NEXT: v_lshl_or_b32 v104, v3, 16, v2 +; ALIGNED-NEXT: v_lshl_or_b32 v2, v21, 8, v22 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v23, 8, v20 +; ALIGNED-NEXT: v_lshl_or_b32 v76, v3, 16, v2 +; ALIGNED-NEXT: v_lshl_or_b32 v2, v17, 8, v19 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v14, 8, v13 +; ALIGNED-NEXT: v_lshl_or_b32 v101, v3, 16, v2 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v15, 8, v18 +; ALIGNED-NEXT: v_lshl_or_b32 v84, v43, 16, v3 +; ALIGNED-NEXT: v_lshl_or_b32 v43, v9, 8, v11 +; ALIGNED-NEXT: v_lshl_or_b32 v3, v57, 16, v43 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v43, v5, 8, v6 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v57, v7, 8, v1 +; ALIGNED-NEXT: v_lshl_or_b32 v2, v57, 16, v43 +; ALIGNED-NEXT: buffer_load_ubyte v43, v4, s[0:3], 0 offen offset:1 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1336 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:1344 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x5 +; ALIGNED-NEXT: buffer_load_ubyte v127, v4, s[0:3], 0 offen offset:2 +; ALIGNED-NEXT: buffer_load_ubyte v57, v4, s[0:3], 0 offen offset:3 +; ALIGNED-NEXT: buffer_load_ubyte v78, v4, s[0:3], 0 offen offset:4 +; ALIGNED-NEXT: buffer_load_ubyte v90, v4, s[0:3], 0 offen offset:5 +; ALIGNED-NEXT: buffer_load_ubyte v91, v4, s[0:3], 0 offen offset:6 +; ALIGNED-NEXT: buffer_load_ubyte v124, v4, s[0:3], 0 offen offset:7 +; ALIGNED-NEXT: v_lshl_or_b32 v43, v43, 8, v0 +; ALIGNED-NEXT: s_waitcnt vmcnt(4) +; ALIGNED-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:1360 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v57, v57, 8, v127 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:1364 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:1376 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:1380 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v57, 16, v43 +; ALIGNED-NEXT: v_lshl_or_b32 v43, v90, 8, v78 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v57, v124, 8, v91 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1388 ; 4-byte Folded Spill +; ALIGNED-NEXT: v_lshl_or_b32 v0, v57, 16, v43 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1396 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x4 +; ALIGNED-NEXT: buffer_load_ubyte v121, v4, s[0:3], 0 offen offset:12 +; ALIGNED-NEXT: buffer_load_ubyte v107, v4, s[0:3], 0 offen offset:13 +; ALIGNED-NEXT: buffer_load_ubyte v110, v4, s[0:3], 0 offen offset:14 +; ALIGNED-NEXT: buffer_load_ubyte v108, v4, s[0:3], 0 offen offset:15 +; ALIGNED-NEXT: buffer_load_ubyte v93, v4, s[0:3], 0 offen offset:11 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v43, v107, 8, v121 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v57, v108, 8, v110 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v57, 16, v43 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1404 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v95, v4, s[0:3], 0 offen offset:8 +; ALIGNED-NEXT: buffer_load_ubyte v91, v4, s[0:3], 0 offen offset:9 +; ALIGNED-NEXT: buffer_load_ubyte v90, v4, s[0:3], 0 offen offset:10 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_lshl_or_b32 v43, v91, 8, v95 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_lshl_or_b32 v57, v93, 8, v90 +; ALIGNED-NEXT: v_lshl_or_b32 v0, v57, 16, v43 +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:1412 ; 4-byte Folded Spill +; ALIGNED-NEXT: s_clause 0x2 +; ALIGNED-NEXT: buffer_load_ubyte v78, v4, s[0:3], 0 offen offset:16 +; ALIGNED-NEXT: buffer_load_ubyte v43, v4, s[0:3], 0 offen offset:18 +; ALIGNED-NEXT: buffer_load_ubyte v57, v4, s[0:3], 0 offen offset:17 +; ALIGNED-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:488 +; ALIGNED-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:492 +; ALIGNED-NEXT: buffer_store_dword v84, off, s[0:3], s32 offset:484 +; ALIGNED-NEXT: buffer_store_dword v101, off, s[0:3], s32 offset:480 +; ALIGNED-NEXT: s_clause 0x1 +; ALIGNED-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:704 +; ALIGNED-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:708 +; ALIGNED-NEXT: v_add_nc_u32_e32 v4, 0xffffff00, v4 +; ALIGNED-NEXT: s_waitcnt vmcnt(3) +; ALIGNED-NEXT: v_lshl_or_b32 v0, v126, 8, v43 +; ALIGNED-NEXT: s_waitcnt vmcnt(2) +; ALIGNED-NEXT: v_lshl_or_b32 v126, v57, 8, v78 +; ALIGNED-NEXT: s_waitcnt vmcnt(1) +; ALIGNED-NEXT: v_add_co_u32 v2, vcc_lo, v2, s4 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, s5, v3, vcc_lo +; ALIGNED-NEXT: flat_store_byte v[2:3], v1 offset:250 +; ALIGNED-NEXT: flat_store_byte v[2:3], v7 offset:251 +; ALIGNED-NEXT: flat_store_byte v[2:3], v5 offset:249 +; ALIGNED-NEXT: flat_store_byte v[2:3], v8 offset:255 +; ALIGNED-NEXT: flat_store_byte v[2:3], v9 offset:253 +; ALIGNED-NEXT: flat_store_byte v[2:3], v10 offset:254 +; ALIGNED-NEXT: flat_store_byte v[2:3], v11 offset:252 +; ALIGNED-NEXT: flat_store_byte v[2:3], v6 offset:248 +; ALIGNED-NEXT: flat_store_byte v[2:3], v13 offset:242 +; ALIGNED-NEXT: flat_store_byte v[2:3], v14 offset:243 +; ALIGNED-NEXT: flat_store_byte v[2:3], v17 offset:241 +; ALIGNED-NEXT: flat_store_byte v[2:3], v12 offset:247 +; ALIGNED-NEXT: flat_store_byte v[2:3], v15 offset:245 +; ALIGNED-NEXT: flat_store_byte v[2:3], v16 offset:246 +; ALIGNED-NEXT: flat_store_byte v[2:3], v18 offset:244 +; ALIGNED-NEXT: flat_store_byte v[2:3], v19 offset:240 +; ALIGNED-NEXT: buffer_store_dword v76, off, s[0:3], s32 offset:504 +; ALIGNED-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:508 +; ALIGNED-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:500 +; ALIGNED-NEXT: v_lshl_or_b32 v126, v0, 16, v126 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1484 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_add_u32 s4, s4, 0xffffff00 +; ALIGNED-NEXT: s_addc_u32 s5, s5, -1 +; ALIGNED-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:496 +; ALIGNED-NEXT: flat_store_byte v[2:3], v20 offset:234 +; ALIGNED-NEXT: flat_store_byte v[2:3], v23 offset:235 +; ALIGNED-NEXT: flat_store_byte v[2:3], v21 offset:233 +; ALIGNED-NEXT: flat_store_byte v[2:3], v24 offset:239 +; ALIGNED-NEXT: flat_store_byte v[2:3], v25 offset:237 +; ALIGNED-NEXT: flat_store_byte v[2:3], v26 offset:238 +; ALIGNED-NEXT: flat_store_byte v[2:3], v27 offset:236 +; ALIGNED-NEXT: flat_store_byte v[2:3], v22 offset:232 +; ALIGNED-NEXT: flat_store_byte v[2:3], v29 offset:226 +; ALIGNED-NEXT: flat_store_byte v[2:3], v30 offset:227 +; ALIGNED-NEXT: flat_store_byte v[2:3], v33 offset:225 +; ALIGNED-NEXT: flat_store_byte v[2:3], v28 offset:231 +; ALIGNED-NEXT: flat_store_byte v[2:3], v31 offset:229 +; ALIGNED-NEXT: flat_store_byte v[2:3], v32 offset:230 +; ALIGNED-NEXT: flat_store_byte v[2:3], v34 offset:228 +; ALIGNED-NEXT: flat_store_byte v[2:3], v35 offset:224 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1480 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:448 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1476 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:460 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1472 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:456 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1468 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:452 +; ALIGNED-NEXT: flat_store_byte v[2:3], v54 offset:213 +; ALIGNED-NEXT: flat_store_byte v[2:3], v52 offset:215 +; ALIGNED-NEXT: flat_store_byte v[2:3], v36 offset:209 +; ALIGNED-NEXT: flat_store_byte v[2:3], v55 offset:211 +; ALIGNED-NEXT: flat_store_byte v[2:3], v37 offset:210 +; ALIGNED-NEXT: flat_store_byte v[2:3], v65 offset:214 +; ALIGNED-NEXT: flat_store_byte v[2:3], v67 offset:212 +; ALIGNED-NEXT: flat_store_byte v[2:3], v49 offset:218 +; ALIGNED-NEXT: flat_store_byte v[2:3], v48 offset:219 +; ALIGNED-NEXT: flat_store_byte v[2:3], v53 offset:217 +; ALIGNED-NEXT: flat_store_byte v[2:3], v39 offset:223 +; ALIGNED-NEXT: flat_store_byte v[2:3], v51 offset:221 +; ALIGNED-NEXT: flat_store_byte v[2:3], v50 offset:222 +; ALIGNED-NEXT: flat_store_byte v[2:3], v64 offset:220 +; ALIGNED-NEXT: flat_store_byte v[2:3], v66 offset:216 +; ALIGNED-NEXT: flat_store_byte v[2:3], v38 offset:208 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1464 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:472 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1460 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:476 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1456 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:468 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1452 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:464 +; ALIGNED-NEXT: flat_store_byte v[2:3], v68 offset:202 +; ALIGNED-NEXT: flat_store_byte v[2:3], v71 offset:203 +; ALIGNED-NEXT: flat_store_byte v[2:3], v69 offset:201 +; ALIGNED-NEXT: flat_store_byte v[2:3], v80 offset:207 +; ALIGNED-NEXT: flat_store_byte v[2:3], v81 offset:205 +; ALIGNED-NEXT: flat_store_byte v[2:3], v82 offset:206 +; ALIGNED-NEXT: flat_store_byte v[2:3], v83 offset:204 +; ALIGNED-NEXT: flat_store_byte v[2:3], v70 offset:200 +; ALIGNED-NEXT: flat_store_byte v[2:3], v87 offset:194 +; ALIGNED-NEXT: flat_store_byte v[2:3], v86 offset:195 +; ALIGNED-NEXT: flat_store_byte v[2:3], v98 offset:193 +; ALIGNED-NEXT: flat_store_byte v[2:3], v85 offset:199 +; ALIGNED-NEXT: flat_store_byte v[2:3], v97 offset:197 +; ALIGNED-NEXT: flat_store_byte v[2:3], v96 offset:198 +; ALIGNED-NEXT: flat_store_byte v[2:3], v99 offset:196 +; ALIGNED-NEXT: flat_store_byte v[2:3], v100 offset:192 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1448 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:552 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1444 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:556 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1440 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:548 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1436 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:544 +; ALIGNED-NEXT: flat_store_byte v[2:3], v102 offset:186 +; ALIGNED-NEXT: flat_store_byte v[2:3], v112 offset:187 +; ALIGNED-NEXT: flat_store_byte v[2:3], v103 offset:185 +; ALIGNED-NEXT: flat_store_byte v[2:3], v114 offset:191 +; ALIGNED-NEXT: flat_store_byte v[2:3], v115 offset:189 +; ALIGNED-NEXT: flat_store_byte v[2:3], v116 offset:190 +; ALIGNED-NEXT: flat_store_byte v[2:3], v117 offset:188 +; ALIGNED-NEXT: flat_store_byte v[2:3], v113 offset:184 +; ALIGNED-NEXT: flat_store_byte v[2:3], v119 offset:178 +; ALIGNED-NEXT: flat_store_byte v[2:3], v40 offset:179 +; ALIGNED-NEXT: flat_store_byte v[2:3], v44 offset:177 +; ALIGNED-NEXT: flat_store_byte v[2:3], v118 offset:183 +; ALIGNED-NEXT: flat_store_byte v[2:3], v41 offset:181 +; ALIGNED-NEXT: flat_store_byte v[2:3], v42 offset:182 +; ALIGNED-NEXT: flat_store_byte v[2:3], v45 offset:180 +; ALIGNED-NEXT: flat_store_byte v[2:3], v46 offset:176 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1432 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:568 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1428 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:572 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1424 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:564 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1420 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:560 +; ALIGNED-NEXT: flat_store_byte v[2:3], v47 offset:170 +; ALIGNED-NEXT: flat_store_byte v[2:3], v58 offset:171 +; ALIGNED-NEXT: flat_store_byte v[2:3], v56 offset:169 +; ALIGNED-NEXT: flat_store_byte v[2:3], v60 offset:175 +; ALIGNED-NEXT: flat_store_byte v[2:3], v61 offset:173 +; ALIGNED-NEXT: flat_store_byte v[2:3], v62 offset:174 +; ALIGNED-NEXT: flat_store_byte v[2:3], v63 offset:172 +; ALIGNED-NEXT: flat_store_byte v[2:3], v59 offset:168 +; ALIGNED-NEXT: flat_store_byte v[2:3], v73 offset:162 +; ALIGNED-NEXT: flat_store_byte v[2:3], v74 offset:163 +; ALIGNED-NEXT: flat_store_byte v[2:3], v79 offset:161 +; ALIGNED-NEXT: flat_store_byte v[2:3], v72 offset:167 +; ALIGNED-NEXT: flat_store_byte v[2:3], v75 offset:165 +; ALIGNED-NEXT: flat_store_byte v[2:3], v77 offset:166 +; ALIGNED-NEXT: flat_store_byte v[2:3], v88 offset:164 +; ALIGNED-NEXT: flat_store_byte v[2:3], v89 offset:160 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1408 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:520 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1400 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:524 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1392 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:516 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1384 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:512 +; ALIGNED-NEXT: flat_store_byte v[2:3], v92 offset:154 +; ALIGNED-NEXT: flat_store_byte v[2:3], v106 offset:155 +; ALIGNED-NEXT: flat_store_byte v[2:3], v94 offset:153 +; ALIGNED-NEXT: flat_store_byte v[2:3], v109 offset:159 +; ALIGNED-NEXT: flat_store_byte v[2:3], v111 offset:157 +; ALIGNED-NEXT: flat_store_byte v[2:3], v120 offset:158 +; ALIGNED-NEXT: flat_store_byte v[2:3], v122 offset:156 +; ALIGNED-NEXT: flat_store_byte v[2:3], v105 offset:152 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1356 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:146 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1348 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:147 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1340 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:145 +; ALIGNED-NEXT: flat_store_byte v[2:3], v125 offset:151 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1368 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:149 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1372 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:150 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1352 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:148 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1332 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:144 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1328 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:536 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1312 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:540 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1292 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:532 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1284 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:528 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1324 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:138 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1320 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:139 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1316 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:137 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1304 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:143 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1296 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:141 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1300 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:142 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1288 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:140 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1308 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:136 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1268 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:130 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1260 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:131 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1256 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:129 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1280 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:135 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1272 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:133 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1276 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:134 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1264 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:132 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1252 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:128 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1248 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:616 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1232 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:620 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:612 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:608 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1244 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:122 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1240 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:123 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1236 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:121 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1224 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:127 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1216 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:125 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1220 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:126 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:124 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:120 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1188 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:114 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1180 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:115 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1176 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:113 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1200 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:119 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1192 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:117 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1196 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:118 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1184 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:116 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:112 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:632 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1152 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:636 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:628 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:624 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1164 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:106 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1160 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:107 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:105 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1144 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:111 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1136 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:109 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1140 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:110 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:108 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:104 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:98 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:99 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:97 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:103 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:101 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:102 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:100 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:96 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:584 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:588 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:580 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:576 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:90 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:91 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:89 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:95 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:93 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:94 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:92 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:88 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:82 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:83 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:81 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:87 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:85 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:86 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:84 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:80 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:600 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:604 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:596 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:976 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:592 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:74 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:75 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:996 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:73 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:980 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:79 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:968 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:77 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:972 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:78 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:964 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:76 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:72 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:956 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:66 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:948 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:67 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:936 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:65 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:960 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:71 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:944 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:69 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:952 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:70 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:940 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:68 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:924 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:64 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:932 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:680 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:928 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:684 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:920 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:676 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:672 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:900 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:61 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:912 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:58 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:908 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:59 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:904 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:57 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:896 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:63 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:888 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:62 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:884 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:60 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:892 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:56 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:876 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:53 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:860 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:50 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:856 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:51 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:844 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:49 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:872 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:55 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:864 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:54 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:848 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:52 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:836 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:48 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:880 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:700 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:868 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:696 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:852 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:692 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:840 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:688 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:832 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:43 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:828 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:42 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:816 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:41 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:812 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:40 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:47 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:820 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:46 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:808 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:45 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:804 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:44 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:792 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:35 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:788 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:34 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:772 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:33 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:768 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:32 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:784 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:39 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:776 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:38 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:764 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:37 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:760 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:36 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:800 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:648 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:796 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:652 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:644 +; ALIGNED-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:640 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:756 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:26 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:752 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:27 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:748 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:25 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:744 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:31 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:736 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:29 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:740 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:30 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:728 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:28 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:732 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:24 +; ALIGNED-NEXT: flat_store_byte v[2:3], v43 offset:18 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1416 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:19 +; ALIGNED-NEXT: flat_store_byte v[2:3], v57 offset:17 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:724 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:23 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:716 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:21 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:720 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:22 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:20 +; ALIGNED-NEXT: flat_store_byte v[2:3], v78 offset:16 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1412 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:664 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1404 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:668 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1396 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:660 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1388 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:656 +; ALIGNED-NEXT: flat_store_byte v[2:3], v90 offset:10 +; ALIGNED-NEXT: flat_store_byte v[2:3], v93 offset:11 +; ALIGNED-NEXT: flat_store_byte v[2:3], v107 offset:13 +; ALIGNED-NEXT: flat_store_byte v[2:3], v91 offset:9 +; ALIGNED-NEXT: flat_store_byte v[2:3], v108 offset:15 +; ALIGNED-NEXT: flat_store_byte v[2:3], v110 offset:14 +; ALIGNED-NEXT: flat_store_byte v[2:3], v121 offset:12 +; ALIGNED-NEXT: flat_store_byte v[2:3], v95 offset:8 +; ALIGNED-NEXT: flat_store_byte v[2:3], v127 offset:2 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1360 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:3 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1344 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:1 +; ALIGNED-NEXT: flat_store_byte v[2:3], v124 offset:7 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1376 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:5 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1380 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:6 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1364 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 offset:4 +; ALIGNED-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:1336 ; 4-byte Folded Reload +; ALIGNED-NEXT: s_waitcnt vmcnt(0) +; ALIGNED-NEXT: flat_store_byte v[2:3], v0 +; ALIGNED-NEXT: s_cbranch_scc0 .LBB9_4 +; ALIGNED-NEXT: .LBB9_5: ; %Flow11 +; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; ALIGNED-NEXT: s_clause 0x2f +; ALIGNED-NEXT: buffer_load_dword v127, off, s[0:3], s32 +; ALIGNED-NEXT: buffer_load_dword v126, off, s[0:3], s32 offset:4 +; ALIGNED-NEXT: buffer_load_dword v125, off, s[0:3], s32 offset:8 +; ALIGNED-NEXT: buffer_load_dword v124, off, s[0:3], s32 offset:12 +; ALIGNED-NEXT: buffer_load_dword v123, off, s[0:3], s32 offset:16 +; ALIGNED-NEXT: buffer_load_dword v122, off, s[0:3], s32 offset:20 +; ALIGNED-NEXT: buffer_load_dword v121, off, s[0:3], s32 offset:24 +; ALIGNED-NEXT: buffer_load_dword v120, off, s[0:3], s32 offset:28 +; ALIGNED-NEXT: buffer_load_dword v111, off, s[0:3], s32 offset:32 +; ALIGNED-NEXT: buffer_load_dword v110, off, s[0:3], s32 offset:36 +; ALIGNED-NEXT: buffer_load_dword v109, off, s[0:3], s32 offset:40 +; ALIGNED-NEXT: buffer_load_dword v108, off, s[0:3], s32 offset:44 +; ALIGNED-NEXT: buffer_load_dword v107, off, s[0:3], s32 offset:48 +; ALIGNED-NEXT: buffer_load_dword v106, off, s[0:3], s32 offset:52 +; ALIGNED-NEXT: buffer_load_dword v105, off, s[0:3], s32 offset:56 +; ALIGNED-NEXT: buffer_load_dword v104, off, s[0:3], s32 offset:60 +; ALIGNED-NEXT: buffer_load_dword v95, off, s[0:3], s32 offset:64 +; ALIGNED-NEXT: buffer_load_dword v94, off, s[0:3], s32 offset:68 +; ALIGNED-NEXT: buffer_load_dword v93, off, s[0:3], s32 offset:72 +; ALIGNED-NEXT: buffer_load_dword v92, off, s[0:3], s32 offset:76 +; ALIGNED-NEXT: buffer_load_dword v91, off, s[0:3], s32 offset:80 +; ALIGNED-NEXT: buffer_load_dword v90, off, s[0:3], s32 offset:84 +; ALIGNED-NEXT: buffer_load_dword v89, off, s[0:3], s32 offset:88 +; ALIGNED-NEXT: buffer_load_dword v88, off, s[0:3], s32 offset:92 +; ALIGNED-NEXT: buffer_load_dword v79, off, s[0:3], s32 offset:96 +; ALIGNED-NEXT: buffer_load_dword v78, off, s[0:3], s32 offset:100 +; ALIGNED-NEXT: buffer_load_dword v77, off, s[0:3], s32 offset:104 +; ALIGNED-NEXT: buffer_load_dword v76, off, s[0:3], s32 offset:108 +; ALIGNED-NEXT: buffer_load_dword v75, off, s[0:3], s32 offset:112 +; ALIGNED-NEXT: buffer_load_dword v74, off, s[0:3], s32 offset:116 +; ALIGNED-NEXT: buffer_load_dword v73, off, s[0:3], s32 offset:120 +; ALIGNED-NEXT: buffer_load_dword v72, off, s[0:3], s32 offset:124 +; ALIGNED-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:128 +; ALIGNED-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:132 +; ALIGNED-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:136 +; ALIGNED-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:140 +; ALIGNED-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:144 +; ALIGNED-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:148 +; ALIGNED-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:152 +; ALIGNED-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:156 +; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:160 +; ALIGNED-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:164 +; ALIGNED-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:168 +; ALIGNED-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:172 +; ALIGNED-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:176 +; ALIGNED-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:180 +; ALIGNED-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:184 +; ALIGNED-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:188 +; ALIGNED-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; ALIGNED-NEXT: s_setpc_b64 s[30:31] +; +; UNROLL3-LABEL: memmove_p0_p5_sz2048: +; UNROLL3: ; %bb.0: ; %entry +; UNROLL3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNROLL3-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1] +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0 +; UNROLL3-NEXT: s_mov_b32 s6, exec_lo +; UNROLL3-NEXT: v_cndmask_b32_e32 v3, -1, v0, vcc_lo +; UNROLL3-NEXT: v_cmpx_ge_u32_e64 v2, v3 +; UNROLL3-NEXT: s_xor_b32 s6, exec_lo, s6 +; UNROLL3-NEXT: s_cbranch_execz .LBB9_4 +; UNROLL3-NEXT: ; %bb.1: ; %memmove_fwd_loop.preheader +; UNROLL3-NEXT: v_mov_b32_e32 v3, v2 +; UNROLL3-NEXT: s_inst_prefetch 0x1 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB9_2: ; %memmove_fwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: s_clause 0xb +; UNROLL3-NEXT: buffer_load_dword v4, v3, s[0:3], 0 offen +; UNROLL3-NEXT: buffer_load_dword v5, v3, s[0:3], 0 offen offset:4 +; UNROLL3-NEXT: buffer_load_dword v6, v3, s[0:3], 0 offen offset:8 +; UNROLL3-NEXT: buffer_load_dword v7, v3, s[0:3], 0 offen offset:12 +; UNROLL3-NEXT: buffer_load_dword v8, v3, s[0:3], 0 offen offset:16 +; UNROLL3-NEXT: buffer_load_dword v9, v3, s[0:3], 0 offen offset:20 +; UNROLL3-NEXT: buffer_load_dword v10, v3, s[0:3], 0 offen offset:24 +; UNROLL3-NEXT: buffer_load_dword v11, v3, s[0:3], 0 offen offset:28 +; UNROLL3-NEXT: buffer_load_dword v12, v3, s[0:3], 0 offen offset:32 +; UNROLL3-NEXT: buffer_load_dword v13, v3, s[0:3], 0 offen offset:36 +; UNROLL3-NEXT: buffer_load_dword v14, v3, s[0:3], 0 offen offset:40 +; UNROLL3-NEXT: buffer_load_dword v15, v3, s[0:3], 0 offen offset:44 +; UNROLL3-NEXT: v_add_co_u32 v16, vcc_lo, v0, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: s_add_u32 s4, s4, 48 +; UNROLL3-NEXT: v_add_nc_u32_e32 v3, 48, v3 +; UNROLL3-NEXT: s_addc_u32 s5, s5, 0 +; UNROLL3-NEXT: s_waitcnt vmcnt(4) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[8:11] offset:16 +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[4:7] +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[16:17], v[12:15] offset:32 +; UNROLL3-NEXT: s_cmp_lg_u64 s[4:5], 0x7e0 +; UNROLL3-NEXT: s_cbranch_scc1 .LBB9_2 +; UNROLL3-NEXT: ; %bb.3: ; %memmove_fwd_residual +; UNROLL3-NEXT: s_inst_prefetch 0x2 +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:2016 +; UNROLL3-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:2020 +; UNROLL3-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:2024 +; UNROLL3-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:2028 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:2016 +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:2032 +; UNROLL3-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:2036 +; UNROLL3-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:2040 +; UNROLL3-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:2044 +; UNROLL3-NEXT: ; implicit-def: $vgpr2 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:2032 +; UNROLL3-NEXT: ; implicit-def: $vgpr0_vgpr1 +; UNROLL3-NEXT: .LBB9_4: ; %Flow8 +; UNROLL3-NEXT: s_andn2_saveexec_b32 s8, s6 +; UNROLL3-NEXT: s_cbranch_execz .LBB9_7 +; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:2032 +; UNROLL3-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:2036 +; UNROLL3-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:2040 +; UNROLL3-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:2044 +; UNROLL3-NEXT: s_movk_i32 s6, 0xffd0 +; UNROLL3-NEXT: s_mov_b64 s[4:5], 0x7b0 +; UNROLL3-NEXT: s_mov_b32 s7, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:2032 +; UNROLL3-NEXT: s_clause 0x3 +; UNROLL3-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:2016 +; UNROLL3-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:2020 +; UNROLL3-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:2024 +; UNROLL3-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:2028 +; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v2 +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:2016 +; UNROLL3-NEXT: s_inst_prefetch 0x1 +; UNROLL3-NEXT: .p2align 6 +; UNROLL3-NEXT: .LBB9_6: ; %memmove_bwd_loop +; UNROLL3-NEXT: ; =>This Inner Loop Header: Depth=1 +; UNROLL3-NEXT: s_clause 0xb +; UNROLL3-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen +; UNROLL3-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 +; UNROLL3-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 +; UNROLL3-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 +; UNROLL3-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 +; UNROLL3-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 +; UNROLL3-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 +; UNROLL3-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 +; UNROLL3-NEXT: buffer_load_dword v11, v2, s[0:3], 0 offen offset:32 +; UNROLL3-NEXT: buffer_load_dword v12, v2, s[0:3], 0 offen offset:36 +; UNROLL3-NEXT: buffer_load_dword v13, v2, s[0:3], 0 offen offset:40 +; UNROLL3-NEXT: buffer_load_dword v14, v2, s[0:3], 0 offen offset:44 +; UNROLL3-NEXT: v_add_co_u32 v15, vcc_lo, v0, s4 +; UNROLL3-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, s5, v1, vcc_lo +; UNROLL3-NEXT: v_subrev_nc_u32_e32 v2, 48, v2 +; UNROLL3-NEXT: s_add_u32 s4, s4, 0xffffffd0 +; UNROLL3-NEXT: s_addc_u32 s5, s5, -1 +; UNROLL3-NEXT: s_waitcnt vmcnt(4) +; UNROLL3-NEXT: flat_store_dwordx4 v[15:16], v[7:10] offset:16 +; UNROLL3-NEXT: flat_store_dwordx4 v[15:16], v[3:6] +; UNROLL3-NEXT: s_waitcnt vmcnt(0) +; UNROLL3-NEXT: flat_store_dwordx4 v[15:16], v[11:14] offset:32 +; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] +; UNROLL3-NEXT: s_cbranch_scc0 .LBB9_6 +; UNROLL3-NEXT: .LBB9_7: ; %Flow9 +; UNROLL3-NEXT: s_inst_prefetch 0x2 +; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; UNROLL3-NEXT: s_waitcnt lgkmcnt(0) +; UNROLL3-NEXT: s_setpc_b64 s[30:31] +entry: + tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 2048, i1 false) + ret void +} + + +declare void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2 +declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 +declare void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 +declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memmove.p0.p0.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(0) nocapture readonly, i64, i1 immarg) #2 +declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #2 +declare void @llvm.memmove.p0.p4.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) #2 +declare void @llvm.memmove.p5.p5.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memmove.p0.p5.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #2 + +attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } From 86f76c3b171f95fd0560339f2ad0f4449277cf8d Mon Sep 17 00:00:00 2001 From: Jack Styles Date: Mon, 28 Oct 2024 08:22:38 +0000 Subject: [PATCH 139/425] [AArch64][Libunwind] Add Support for FEAT_PAuthLR DWARF Instruction (#112171) As part of FEAT_PAuthLR, a new DWARF Frame Instruction was introduced, `DW_CFA_AARCH64_negate_ra_state_with_pc`. This instructs Libunwind that the PC has been used with the signing instruction. This change includes three commits - Libunwind support for the newly introduced DWARF Instruction - CodeGen Support for the DWARF Instructions - Reversing the changes made in #96377. Due to `DW_CFA_AARCH64_negate_ra_state_with_pc`'s requirements to be placed immediately after the signing instruction, this would mean the CFI Instruction location was not consistent with the generated location when not using FEAT_PAuthLR. The commit reverses the changes and makes the location consistent across the different branch protection options. While this does have a code size effect, this is a negligible one. For the ABI information, see here: https://github.com/ARM-software/abi-aa/blob/853286c7ab66048e4b819682ce17f567b77a0291/aadwarf64/aadwarf64.rst#id23 --- libunwind/src/DwarfInstructions.hpp | 59 ++++-- libunwind/src/DwarfParser.hpp | 21 +++ libunwind/src/dwarf2.h | 58 +++--- llvm/include/llvm/BinaryFormat/Dwarf.def | 1 + llvm/include/llvm/MC/MCDwarf.h | 7 + llvm/include/llvm/MC/MCStreamer.h | 1 + .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 3 + llvm/lib/CodeGen/CFIInstrInserter.cpp | 1 + llvm/lib/CodeGen/MIRParser/MILexer.cpp | 2 + llvm/lib/CodeGen/MIRParser/MILexer.h | 1 + llvm/lib/CodeGen/MIRParser/MIParser.cpp | 5 + llvm/lib/CodeGen/MachineOperand.cpp | 4 + llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp | 24 +++ llvm/lib/MC/MCAsmStreamer.cpp | 7 + llvm/lib/MC/MCDwarf.cpp | 4 + llvm/lib/MC/MCStreamer.cpp | 10 ++ .../Target/AArch64/AArch64FrameLowering.cpp | 5 +- .../lib/Target/AArch64/AArch64PointerAuth.cpp | 74 ++++---- .../AArch64/AsmParser/AArch64AsmParser.cpp | 10 ++ .../machine-outliner-retaddr-sign-cfi.ll | 3 +- ...tliner-retaddr-sign-diff-scope-same-key.ll | 6 +- .../machine-outliner-retaddr-sign-non-leaf.ll | 121 +++++++++++-- .../machine-outliner-retaddr-sign-regsave.mir | 3 +- ...tliner-retaddr-sign-same-scope-diff-key.ll | 139 +++++++++++--- ...machine-outliner-retaddr-sign-subtarget.ll | 9 +- .../machine-outliner-retaddr-sign-thunk.ll | 169 ++++++++++++++---- .../AArch64/pacbti-llvm-generated-funcs-2.ll | 3 +- ...sign-return-address-cfi-negate-ra-state.ll | 11 +- .../AArch64/sign-return-address-pauth-lr.ll | 162 +++++++++++------ .../CodeGen/AArch64/sign-return-address.ll | 18 +- .../MIR/AArch64/return-address-signing.mir | 23 +++ .../MC/AArch64/directives-case_insensitive.s | 2 + .../test/MC/AArch64/negate_ra_state_with_pc.s | 7 + .../DebugInfo/DWARF/DWARFDebugFrameTest.cpp | 1 + 34 files changed, 733 insertions(+), 241 deletions(-) create mode 100644 llvm/test/MC/AArch64/negate_ra_state_with_pc.s diff --git a/libunwind/src/DwarfInstructions.hpp b/libunwind/src/DwarfInstructions.hpp index bd9ece60ee5881a..e7be0d6d5d63549 100644 --- a/libunwind/src/DwarfInstructions.hpp +++ b/libunwind/src/DwarfInstructions.hpp @@ -74,8 +74,10 @@ class DwarfInstructions { __builtin_unreachable(); } #if defined(_LIBUNWIND_TARGET_AARCH64) - static bool getRA_SIGN_STATE(A &addressSpace, R registers, pint_t cfa, - PrologInfo &prolog); + static bool isReturnAddressSigned(A &addressSpace, R registers, pint_t cfa, + PrologInfo &prolog); + static bool isReturnAddressSignedWithPC(A &addressSpace, R registers, + pint_t cfa, PrologInfo &prolog); #endif }; @@ -173,8 +175,9 @@ v128 DwarfInstructions::getSavedVectorRegister( } #if defined(_LIBUNWIND_TARGET_AARCH64) template -bool DwarfInstructions::getRA_SIGN_STATE(A &addressSpace, R registers, - pint_t cfa, PrologInfo &prolog) { +bool DwarfInstructions::isReturnAddressSigned(A &addressSpace, + R registers, pint_t cfa, + PrologInfo &prolog) { pint_t raSignState; auto regloc = prolog.savedRegisters[UNW_AARCH64_RA_SIGN_STATE]; if (regloc.location == CFI_Parser::kRegisterUnused) @@ -185,6 +188,22 @@ bool DwarfInstructions::getRA_SIGN_STATE(A &addressSpace, R registers, // Only bit[0] is meaningful. return raSignState & 0x01; } + +template +bool DwarfInstructions::isReturnAddressSignedWithPC(A &addressSpace, + R registers, + pint_t cfa, + PrologInfo &prolog) { + pint_t raSignState; + auto regloc = prolog.savedRegisters[UNW_AARCH64_RA_SIGN_STATE]; + if (regloc.location == CFI_Parser::kRegisterUnused) + raSignState = static_cast(regloc.value); + else + raSignState = getSavedRegister(addressSpace, registers, cfa, regloc); + + // Only bit[1] is meaningful. + return raSignState & 0x02; +} #endif template @@ -288,7 +307,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // restored. autia1716 is used instead of autia as autia1716 assembles // to a NOP on pre-v8.3a architectures. if ((R::getArch() == REGISTERS_ARM64) && - getRA_SIGN_STATE(addressSpace, registers, cfa, prolog) && + isReturnAddressSigned(addressSpace, registers, cfa, prolog) && returnAddress != 0) { #if !defined(_LIBUNWIND_IS_NATIVE_ONLY) return UNW_ECROSSRASIGNING; @@ -296,13 +315,29 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, register unsigned long long x17 __asm("x17") = returnAddress; register unsigned long long x16 __asm("x16") = cfa; - // These are the autia1716/autib1716 instructions. The hint instructions - // are used here as gcc does not assemble autia1716/autib1716 for pre - // armv8.3a targets. - if (cieInfo.addressesSignedWithBKey) - asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716 - else - asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716 + // We use the hint versions of the authentication instructions below to + // ensure they're assembled by the compiler even for targets with no + // FEAT_PAuth/FEAT_PAuth_LR support. + if (isReturnAddressSignedWithPC(addressSpace, registers, cfa, prolog)) { + register unsigned long long x15 __asm("x15") = + prolog.ptrAuthDiversifier; + if (cieInfo.addressesSignedWithBKey) { + asm("hint 0x27\n\t" // pacm + "hint 0xe" + : "+r"(x17) + : "r"(x16), "r"(x15)); // autib1716 + } else { + asm("hint 0x27\n\t" // pacm + "hint 0xc" + : "+r"(x17) + : "r"(x16), "r"(x15)); // autia1716 + } + } else { + if (cieInfo.addressesSignedWithBKey) + asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716 + else + asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716 + } returnAddress = x17; #endif } diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp index 0682942ce13799e..7e85025dd054d5d 100644 --- a/libunwind/src/DwarfParser.hpp +++ b/libunwind/src/DwarfParser.hpp @@ -91,6 +91,9 @@ class CFI_Parser { int64_t cfaExpression; // CFA = expression uint32_t spExtraArgSize; RegisterLocation savedRegisters[kMaxRegisterNumber + 1]; +#if defined(_LIBUNWIND_TARGET_AARCH64) + pint_t ptrAuthDiversifier; +#endif enum class InitializeTime { kLazy, kNormal }; // When saving registers, this data structure is lazily initialized. @@ -799,6 +802,24 @@ bool CFI_Parser::parseFDEInstructions(A &addressSpace, } break; +#if defined(_LIBUNWIND_TARGET_AARCH64) + case DW_CFA_AARCH64_negate_ra_state_with_pc: { + int64_t value = + results->savedRegisters[UNW_AARCH64_RA_SIGN_STATE].value ^ 0x3; + results->setRegisterValue(UNW_AARCH64_RA_SIGN_STATE, value, + initialState); + // When calculating the value of the PC, it is assumed that the CFI + // instruction is placed before the signing instruction, however it is + // placed after. Because of this, we need to take into account the CFI + // instruction is one instruction call later than expected, and reduce + // the PC value by 4 bytes to compensate. + results->ptrAuthDiversifier = fdeInfo.pcStart + codeOffset - 0x4; + _LIBUNWIND_TRACE_DWARF( + "DW_CFA_AARCH64_negate_ra_state_with_pc(pc=0x%" PRIx64 ")\n", + static_cast(results->ptrAuthDiversifier)); + } break; +#endif + #else (void)arch; #endif diff --git a/libunwind/src/dwarf2.h b/libunwind/src/dwarf2.h index 174277d5a795084..68ad882347203ca 100644 --- a/libunwind/src/dwarf2.h +++ b/libunwind/src/dwarf2.h @@ -18,43 +18,43 @@ // DWARF unwind instructions enum { - DW_CFA_nop = 0x0, - DW_CFA_set_loc = 0x1, - DW_CFA_advance_loc1 = 0x2, - DW_CFA_advance_loc2 = 0x3, - DW_CFA_advance_loc4 = 0x4, - DW_CFA_offset_extended = 0x5, - DW_CFA_restore_extended = 0x6, - DW_CFA_undefined = 0x7, - DW_CFA_same_value = 0x8, - DW_CFA_register = 0x9, - DW_CFA_remember_state = 0xA, - DW_CFA_restore_state = 0xB, - DW_CFA_def_cfa = 0xC, - DW_CFA_def_cfa_register = 0xD, - DW_CFA_def_cfa_offset = 0xE, - DW_CFA_def_cfa_expression = 0xF, - DW_CFA_expression = 0x10, + DW_CFA_nop = 0x0, + DW_CFA_set_loc = 0x1, + DW_CFA_advance_loc1 = 0x2, + DW_CFA_advance_loc2 = 0x3, + DW_CFA_advance_loc4 = 0x4, + DW_CFA_offset_extended = 0x5, + DW_CFA_restore_extended = 0x6, + DW_CFA_undefined = 0x7, + DW_CFA_same_value = 0x8, + DW_CFA_register = 0x9, + DW_CFA_remember_state = 0xA, + DW_CFA_restore_state = 0xB, + DW_CFA_def_cfa = 0xC, + DW_CFA_def_cfa_register = 0xD, + DW_CFA_def_cfa_offset = 0xE, + DW_CFA_def_cfa_expression = 0xF, + DW_CFA_expression = 0x10, DW_CFA_offset_extended_sf = 0x11, - DW_CFA_def_cfa_sf = 0x12, - DW_CFA_def_cfa_offset_sf = 0x13, - DW_CFA_val_offset = 0x14, - DW_CFA_val_offset_sf = 0x15, - DW_CFA_val_expression = 0x16, - DW_CFA_advance_loc = 0x40, // high 2 bits are 0x1, lower 6 bits are delta - DW_CFA_offset = 0x80, // high 2 bits are 0x2, lower 6 bits are register - DW_CFA_restore = 0xC0, // high 2 bits are 0x3, lower 6 bits are register + DW_CFA_def_cfa_sf = 0x12, + DW_CFA_def_cfa_offset_sf = 0x13, + DW_CFA_val_offset = 0x14, + DW_CFA_val_offset_sf = 0x15, + DW_CFA_val_expression = 0x16, + DW_CFA_advance_loc = 0x40, // high 2 bits are 0x1, lower 6 bits are delta + DW_CFA_offset = 0x80, // high 2 bits are 0x2, lower 6 bits are register + DW_CFA_restore = 0xC0, // high 2 bits are 0x3, lower 6 bits are register // GNU extensions - DW_CFA_GNU_window_save = 0x2D, - DW_CFA_GNU_args_size = 0x2E, + DW_CFA_GNU_window_save = 0x2D, + DW_CFA_GNU_args_size = 0x2E, DW_CFA_GNU_negative_offset_extended = 0x2F, // AARCH64 extensions - DW_CFA_AARCH64_negate_ra_state = 0x2D + DW_CFA_AARCH64_negate_ra_state_with_pc = 0x2C, + DW_CFA_AARCH64_negate_ra_state = 0x2D }; - // FSF exception handling Pointer-Encoding constants // Used in CFI augmentation by GCC enum { diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index d55947fc5103ac5..9336f2a454ae47b 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -1238,6 +1238,7 @@ HANDLE_DW_CFA(0x16, val_expression) // Vendor extensions: HANDLE_DW_CFA_PRED(0x1d, MIPS_advance_loc8, SELECT_MIPS64) HANDLE_DW_CFA_PRED(0x2d, GNU_window_save, SELECT_SPARC) +HANDLE_DW_CFA_PRED(0x2c, AARCH64_negate_ra_state_with_pc, SELECT_AARCH64) HANDLE_DW_CFA_PRED(0x2d, AARCH64_negate_ra_state, SELECT_AARCH64) HANDLE_DW_CFA_PRED(0x2e, GNU_args_size, SELECT_X86) // Heterogeneous Debugging Extension defined at diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h index bea79545d1ab96e..1392336968e74ab 100644 --- a/llvm/include/llvm/MC/MCDwarf.h +++ b/llvm/include/llvm/MC/MCDwarf.h @@ -515,6 +515,7 @@ class MCCFIInstruction { OpRegister, OpWindowSave, OpNegateRAState, + OpNegateRAStateWithPC, OpGnuArgsSize, OpLabel, }; @@ -642,6 +643,12 @@ class MCCFIInstruction { return MCCFIInstruction(OpNegateRAState, L, 0, INT64_C(0), Loc); } + /// .cfi_negate_ra_state_with_pc AArch64 negate RA state with PC. + static MCCFIInstruction createNegateRAStateWithPC(MCSymbol *L, + SMLoc Loc = {}) { + return MCCFIInstruction(OpNegateRAStateWithPC, L, 0, INT64_C(0), Loc); + } + /// .cfi_restore says that the rule for Register is now the same as it /// was at the beginning of the function, after all initial instructions added /// by .cfi_startproc were executed. diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 707aecc5dc578e0..a376ba810ba5152 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -1022,6 +1022,7 @@ class MCStreamer { SMLoc Loc = {}); virtual void emitCFIWindowSave(SMLoc Loc = {}); virtual void emitCFINegateRAState(SMLoc Loc = {}); + virtual void emitCFINegateRAStateWithPC(SMLoc Loc = {}); virtual void emitCFILabelDirective(SMLoc Loc, StringRef Name); virtual void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc()); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 21d0d070c247f48..daad82d26da652e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -236,6 +236,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { case MCCFIInstruction::OpNegateRAState: OutStreamer->emitCFINegateRAState(Loc); break; + case MCCFIInstruction::OpNegateRAStateWithPC: + OutStreamer->emitCFINegateRAStateWithPC(Loc); + break; case MCCFIInstruction::OpSameValue: OutStreamer->emitCFISameValue(Inst.getRegister(), Loc); break; diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp index f5bedc7b8ecdfc2..4217ec6a1cca8a9 100644 --- a/llvm/lib/CodeGen/CFIInstrInserter.cpp +++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp @@ -260,6 +260,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) { case MCCFIInstruction::OpEscape: case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: + case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpGnuArgsSize: case MCCFIInstruction::OpLabel: break; diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 0809f88fde56b1c..5a3806ce57335ae 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -238,6 +238,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("window_save", MIToken::kw_cfi_window_save) .Case("negate_ra_sign_state", MIToken::kw_cfi_aarch64_negate_ra_sign_state) + .Case("negate_ra_sign_state_with_pc", + MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc) .Case("blockaddress", MIToken::kw_blockaddress) .Case("intrinsic", MIToken::kw_intrinsic) .Case("target-index", MIToken::kw_target_index) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 22547483a8a86be..3931da3eaae1d3a 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -96,6 +96,7 @@ struct MIToken { kw_cfi_undefined, kw_cfi_window_save, kw_cfi_aarch64_negate_ra_sign_state, + kw_cfi_aarch64_negate_ra_sign_state_with_pc, kw_blockaddress, kw_intrinsic, kw_target_index, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 7aaa0f409d5ef9d..45847b5830da656 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -2576,6 +2576,10 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) { case MIToken::kw_cfi_aarch64_negate_ra_sign_state: CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); break; + case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc: + CFIIndex = + MF.addFrameInst(MCCFIInstruction::createNegateRAStateWithPC(nullptr)); + break; case MIToken::kw_cfi_escape: { std::string Values; if (parseCFIEscapeValues(Values)) @@ -2931,6 +2935,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx, case MIToken::kw_cfi_undefined: case MIToken::kw_cfi_window_save: case MIToken::kw_cfi_aarch64_negate_ra_sign_state: + case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc: return parseCFIOperand(Dest); case MIToken::kw_blockaddress: return parseBlockAddressOperand(Dest); diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index c0e004555de9598..620779e2dec7086 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -768,6 +768,10 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, if (MCSymbol *Label = CFI.getLabel()) MachineOperand::printSymbol(OS, *Label); break; + case MCCFIInstruction::OpNegateRAStateWithPC: + OS << "negate_ra_sign_state_with_pc "; + if (MCSymbol *Label = CFI.getLabel()) + MachineOperand::printSymbol(OS, *Label); default: // TODO: Print the other CFI Operations. OS << ""; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index aff26824dda1049..96cb86ad4c37117 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -288,6 +288,7 @@ Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset, case DW_CFA_remember_state: case DW_CFA_restore_state: case DW_CFA_GNU_window_save: + case DW_CFA_AARCH64_negate_ra_state_with_pc: // No operands addInstruction(Opcode); break; @@ -666,6 +667,28 @@ Error UnwindTable::parseRows(const CFIProgram &CFIP, UnwindRow &Row, } break; + case dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc: { + constexpr uint32_t AArch64DWARFPAuthRaState = 34; + auto LRLoc = Row.getRegisterLocations().getRegisterLocation( + AArch64DWARFPAuthRaState); + if (LRLoc) { + if (LRLoc->getLocation() == UnwindLocation::Constant) { + // Toggle the constant value of bits[1:0] from 0 to 1 or 1 to 0. + LRLoc->setConstant(LRLoc->getConstant() ^ 0x3); + } else { + return createStringError( + errc::invalid_argument, + "%s encountered when existing rule for this register is not " + "a constant", + CFIP.callFrameString(Inst.Opcode).str().c_str()); + } + } else { + Row.getRegisterLocations().setRegisterLocation( + AArch64DWARFPAuthRaState, UnwindLocation::createIsConstant(0x3)); + } + break; + } + case dwarf::DW_CFA_undefined: { llvm::Expected RegNum = Inst.getOperandAsUnsigned(CFIP, 0); if (!RegNum) @@ -847,6 +870,7 @@ CFIProgram::getOperandTypes() { DECLARE_OP0(DW_CFA_remember_state); DECLARE_OP0(DW_CFA_restore_state); DECLARE_OP0(DW_CFA_GNU_window_save); + DECLARE_OP0(DW_CFA_AARCH64_negate_ra_state_with_pc); DECLARE_OP1(DW_CFA_GNU_args_size, OT_Offset); DECLARE_OP0(DW_CFA_nop); diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 31b519a3e5c56a0..b9ad0b4eac9c7ba 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -373,6 +373,7 @@ class MCAsmStreamer final : public MCStreamer { SMLoc Loc) override; void emitCFIWindowSave(SMLoc Loc) override; void emitCFINegateRAState(SMLoc Loc) override; + void emitCFINegateRAStateWithPC(SMLoc Loc) override; void emitCFIReturnColumn(int64_t Register) override; void emitCFILabelDirective(SMLoc Loc, StringRef Name) override; @@ -2145,6 +2146,12 @@ void MCAsmStreamer::emitCFINegateRAState(SMLoc Loc) { EmitEOL(); } +void MCAsmStreamer::emitCFINegateRAStateWithPC(SMLoc Loc) { + MCStreamer::emitCFINegateRAStateWithPC(Loc); + OS << "\t.cfi_negate_ra_state_with_pc"; + EmitEOL(); +} + void MCAsmStreamer::emitCFIReturnColumn(int64_t Register) { MCStreamer::emitCFIReturnColumn(Register); OS << "\t.cfi_return_column "; diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp index 8ff097f29aebd15..e058358fb8ad4bd 100644 --- a/llvm/lib/MC/MCDwarf.cpp +++ b/llvm/lib/MC/MCDwarf.cpp @@ -1381,6 +1381,10 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) { Streamer.emitInt8(dwarf::DW_CFA_AARCH64_negate_ra_state); return; + case MCCFIInstruction::OpNegateRAStateWithPC: + Streamer.emitInt8(dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc); + return; + case MCCFIInstruction::OpUndefined: { unsigned Reg = Instr.getRegister(); Streamer.emitInt8(dwarf::DW_CFA_undefined); diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 13b162768578c53..5474db1315f1416 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -688,6 +688,16 @@ void MCStreamer::emitCFINegateRAState(SMLoc Loc) { CurFrame->Instructions.push_back(Instruction); } +void MCStreamer::emitCFINegateRAStateWithPC(SMLoc Loc) { + MCSymbol *Label = emitCFILabel(); + MCCFIInstruction Instruction = + MCCFIInstruction::createNegateRAStateWithPC(Label, Loc); + MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; + CurFrame->Instructions.push_back(Instruction); +} + void MCStreamer::emitCFIReturnColumn(int64_t Register) { MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); if (!CurFrame) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index bbf2f2677954577..275070b332ac87f 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -702,7 +702,10 @@ void AArch64FrameLowering::resetCFIToInitialState( // Flip the RA sign state. if (MFI.shouldSignReturnAddress(MF)) { - CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + auto CFIInst = MFI.branchProtectionPAuthLR() + ? MCCFIInstruction::createNegateRAStateWithPC(nullptr) + : MCCFIInstruction::createNegateRAState(nullptr); + CFIIndex = MF.addFrameInst(CFIInst); BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); } diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp index 92ab4b5c3d251f3..2e5688cf60027ab 100644 --- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp +++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp @@ -71,6 +71,18 @@ FunctionPass *llvm::createAArch64PointerAuthPass() { char AArch64PointerAuth::ID = 0; +static void emitPACSymOffsetIntoX16(const TargetInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + MCSymbol *PACSym) { + BuildMI(MBB, I, DL, TII.get(AArch64::ADRP), AArch64::X16) + .addSym(PACSym, AArch64II::MO_PAGE); + BuildMI(MBB, I, DL, TII.get(AArch64::ADDXri), AArch64::X16) + .addReg(AArch64::X16) + .addSym(PACSym, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) + .addImm(0); +} + // Where PAuthLR support is not known at compile time, it is supported using // PACM. PACM is in the hint space so has no effect when PAuthLR is not // supported by the hardware, but will alter the behaviour of PACI*SP, AUTI*SP @@ -81,12 +93,10 @@ static void BuildPACM(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB, const TargetInstrInfo *TII = Subtarget.getInstrInfo(); auto &MFnI = *MBB.getParent()->getInfo(); - // ADR X16, + // Offset to PAC*SP using ADRP + ADD. if (PACSym) { assert(Flags == MachineInstr::FrameDestroy); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADR)) - .addReg(AArch64::X16, RegState::Define) - .addSym(PACSym); + emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym); } // Only emit PACM if -mbranch-protection has +pc and the target does not @@ -95,12 +105,31 @@ static void BuildPACM(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM)).setMIFlag(Flags); } +static void emitPACCFI(const AArch64Subtarget &Subtarget, + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL, MachineInstr::MIFlag Flags, bool EmitCFI) { + if (!EmitCFI) + return; + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto &MF = *MBB.getParent(); + auto &MFnI = *MF.getInfo(); + + auto CFIInst = MFnI.branchProtectionPAuthLR() + ? MCCFIInstruction::createNegateRAStateWithPC(nullptr) + : MCCFIInstruction::createNegateRAState(nullptr); + + unsigned CFIIndex = MF.addFrameInst(CFIInst); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(Flags); +} + void AArch64PointerAuth::signLR(MachineFunction &MF, MachineBasicBlock::iterator MBBI) const { auto &MFnI = *MF.getInfo(); bool UseBKey = MFnI.shouldSignWithBKey(); bool EmitCFI = MFnI.needsDwarfUnwindInfo(MF); - bool EmitAsyncCFI = MFnI.needsAsyncDwarfUnwindInfo(MF); bool NeedsWinCFI = MF.hasWinCFI(); MachineBasicBlock &MBB = *MBBI->getParent(); @@ -128,6 +157,7 @@ void AArch64PointerAuth::signLR(MachineFunction &MF, : AArch64::PACIASPPC)) .setMIFlag(MachineInstr::FrameSetup) ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel()); + emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI); } else { BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, @@ -135,27 +165,10 @@ void AArch64PointerAuth::signLR(MachineFunction &MF, : AArch64::PACIASP)) .setMIFlag(MachineInstr::FrameSetup) ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel()); + emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI); } - if (EmitCFI) { - if (!EmitAsyncCFI) { - // Reduce the size of the generated call frame information for synchronous - // CFI by bundling the new CFI instruction with others in the prolog, so - // that no additional DW_CFA_advance_loc is needed. - for (auto I = MBBI; I != MBB.end(); ++I) { - if (I->getOpcode() == TargetOpcode::CFI_INSTRUCTION && - I->getFlag(MachineInstr::FrameSetup)) { - MBBI = I; - break; - } - } - } - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } else if (NeedsWinCFI) { + if (!EmitCFI && NeedsWinCFI) { BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR)) .setMIFlag(MachineInstr::FrameSetup); } @@ -190,6 +203,7 @@ void AArch64PointerAuth::authenticateLR( !MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) { if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) { assert(PACSym && "No PAC instruction to refer to"); + emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym); BuildMI(MBB, TI, DL, TII->get(UseBKey ? AArch64::RETABSPPCi : AArch64::RETAASPPCi)) .addSym(PACSym) @@ -205,24 +219,22 @@ void AArch64PointerAuth::authenticateLR( } else { if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) { assert(PACSym && "No PAC instruction to refer to"); + emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym); BuildMI(MBB, MBBI, DL, TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi)) .addSym(PACSym) .setMIFlag(MachineInstr::FrameDestroy); + emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, + EmitAsyncCFI); } else { BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, PACSym); BuildMI(MBB, MBBI, DL, TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP)) .setMIFlag(MachineInstr::FrameDestroy); + emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, + EmitAsyncCFI); } - if (EmitAsyncCFI) { - unsigned CFIIndex = - MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameDestroy); - } if (NeedsWinCFI) { BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR)) .setMIFlag(MachineInstr::FrameDestroy); diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index dfc5e04110cf57b..8e267e65862210f 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -195,6 +195,7 @@ class AArch64AsmParser : public MCTargetAsmParser { bool parseDirectiveReq(StringRef Name, SMLoc L); bool parseDirectiveUnreq(SMLoc L); bool parseDirectiveCFINegateRAState(); + bool parseDirectiveCFINegateRAStateWithPC(); bool parseDirectiveCFIBKeyFrame(); bool parseDirectiveCFIMTETaggedFrame(); @@ -6975,6 +6976,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { parseDirectiveInst(Loc); else if (IDVal == ".cfi_negate_ra_state") parseDirectiveCFINegateRAState(); + else if (IDVal == ".cfi_negate_ra_state_with_pc") + parseDirectiveCFINegateRAStateWithPC(); else if (IDVal == ".cfi_b_key_frame") parseDirectiveCFIBKeyFrame(); else if (IDVal == ".cfi_mte_tagged_frame") @@ -7425,6 +7428,13 @@ bool AArch64AsmParser::parseDirectiveCFINegateRAState() { return false; } +bool AArch64AsmParser::parseDirectiveCFINegateRAStateWithPC() { + if (parseEOL()) + return true; + getStreamer().emitCFINegateRAStateWithPC(); + return false; +} + /// parseDirectiveCFIBKeyFrame /// ::= .cfi_b_key bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() { diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll index c64b3842aa5baa5..4bbbe40176313a0 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll @@ -11,8 +11,7 @@ define void @a() "sign-return-address"="all" "sign-return-address-key"="b_key" { ; CHECK-NEXT: .cfi_b_key_frame ; V8A-NEXT: hint #27 ; V83A-NEXT: pacibsp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; CHECK-NEXT: .cfi_negate_ra_state %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll index 3221815da33c5e1..6a11bef08c7406b 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll @@ -7,8 +7,7 @@ define void @a() "sign-return-address"="all" { ; CHECK-LABEL: a: // @a ; V8A: hint #25 ; V83A: paciasp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; CHECK-NEXT: .cfi_negate_ra_state %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -55,8 +54,7 @@ define void @c() "sign-return-address"="all" { ; CHECK-LABEL: c: // @c ; V8A: hint #25 ; V83A: paciasp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; CHECK-NEXT .cfi_negate_ra_state %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll index d43b74b9451aae1..1e7224683c6c895 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll @@ -1,15 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 %s -o - | \ ; RUN: FileCheck %s --check-prefixes CHECK,V8A ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 -mattr=+v8.3a %s -o - | \ ; RUN: FileCheck %s --check-prefixes CHECK,V83A define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" { -; CHECK-LABEL: a: // @a -; CHECK: .cfi_b_key_frame -; V8A-NEXT: hint #27 -; V83A-NEXT: pacibsp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; V8A-LABEL: a: +; V8A: // %bb.0: +; V8A-NEXT: .cfi_b_key_frame +; V8A-NEXT: hint #27 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: sub sp, sp, #32 +; V8A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V8A-NEXT: .cfi_def_cfa_offset 32 +; V8A-NEXT: .cfi_offset w30, -16 +; V8A-NEXT: bl OUTLINED_FUNCTION_0 +; V8A-NEXT: //APP +; V8A-NEXT: mov x30, x0 +; V8A-NEXT: //NO_APP +; V8A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V8A-NEXT: add sp, sp, #32 +; V8A-NEXT: hint #31 +; V8A-NEXT: ret +; +; V83A-LABEL: a: +; V83A: // %bb.0: +; V83A-NEXT: .cfi_b_key_frame +; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: sub sp, sp, #32 +; V83A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V83A-NEXT: .cfi_def_cfa_offset 32 +; V83A-NEXT: .cfi_offset w30, -16 +; V83A-NEXT: bl OUTLINED_FUNCTION_0 +; V83A-NEXT: //APP +; V83A-NEXT: mov x30, x0 +; V83A-NEXT: //NO_APP +; V83A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V83A-NEXT: add sp, sp, #32 +; V83A-NEXT: retab %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -27,12 +56,40 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" } define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" { -; CHECK-LABEL: b: // @b -; CHECK: .cfi_b_key_frame -; V8A-NEXT: hint #27 -; V83A-NEXT: pacibsp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; V8A-LABEL: b: +; V8A: // %bb.0: +; V8A-NEXT: .cfi_b_key_frame +; V8A-NEXT: hint #27 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: sub sp, sp, #32 +; V8A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V8A-NEXT: .cfi_def_cfa_offset 32 +; V8A-NEXT: .cfi_offset w30, -16 +; V8A-NEXT: bl OUTLINED_FUNCTION_0 +; V8A-NEXT: //APP +; V8A-NEXT: mov x30, x0 +; V8A-NEXT: //NO_APP +; V8A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V8A-NEXT: add sp, sp, #32 +; V8A-NEXT: hint #31 +; V8A-NEXT: ret +; +; V83A-LABEL: b: +; V83A: // %bb.0: +; V83A-NEXT: .cfi_b_key_frame +; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: sub sp, sp, #32 +; V83A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V83A-NEXT: .cfi_def_cfa_offset 32 +; V83A-NEXT: .cfi_offset w30, -16 +; V83A-NEXT: bl OUTLINED_FUNCTION_0 +; V83A-NEXT: //APP +; V83A-NEXT: mov x30, x0 +; V83A-NEXT: //NO_APP +; V83A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V83A-NEXT: add sp, sp, #32 +; V83A-NEXT: retab %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -50,12 +107,40 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key" } define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" { -; CHECK-LABEL: c: // @c -; CHECK: .cfi_b_key_frame -; V8A-NEXT: hint #27 -; V83A-NEXT: pacibsp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; V8A-LABEL: c: +; V8A: // %bb.0: +; V8A-NEXT: .cfi_b_key_frame +; V8A-NEXT: hint #27 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: sub sp, sp, #32 +; V8A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V8A-NEXT: .cfi_def_cfa_offset 32 +; V8A-NEXT: .cfi_offset w30, -16 +; V8A-NEXT: bl OUTLINED_FUNCTION_0 +; V8A-NEXT: //APP +; V8A-NEXT: mov x30, x0 +; V8A-NEXT: //NO_APP +; V8A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V8A-NEXT: add sp, sp, #32 +; V8A-NEXT: hint #31 +; V8A-NEXT: ret +; +; V83A-LABEL: c: +; V83A: // %bb.0: +; V83A-NEXT: .cfi_b_key_frame +; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: sub sp, sp, #32 +; V83A-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; V83A-NEXT: .cfi_def_cfa_offset 32 +; V83A-NEXT: .cfi_offset w30, -16 +; V83A-NEXT: bl OUTLINED_FUNCTION_0 +; V83A-NEXT: //APP +; V83A-NEXT: mov x30, x0 +; V83A-NEXT: //NO_APP +; V83A-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; V83A-NEXT: add sp, sp, #32 +; V83A-NEXT: retab %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir index ba27d1c681e3f4d..9a983cbd6714ee0 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir @@ -82,8 +82,7 @@ body: | # CHECK: bb.0: # CHECK: frame-setup EMITBKEY # CHECK-NEXT: frame-setup PACIBSP implicit-def $lr, implicit $lr, implicit $sp -# CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state -# CHECK-NEXT: frame-setup CFI_INSTRUCTION +# CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state # CHECK-NOT: OUTLINED_FUNCTION_ # CHECK: bb.1: # CHECK-NOT: OUTLINED_FUNCTION_ diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll index 8c36ab4d8f403a1..87771f5de4f699a 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll @@ -1,14 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 %s -o - | \ ; RUN: FileCheck %s --check-prefixes CHECK,V8A ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 -mattr=+v8.3a %s -o - | \ ; RUN: FileCheck %s --check-prefixes CHECK,V83A define void @a() "sign-return-address"="all" { -; CHECK-LABEL: a: // @a -; V8A: hint #25 -; V83A: paciasp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; V8A-LABEL: a: +; V8A: // %bb.0: +; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: sub sp, sp, #32 +; V8A-NEXT: .cfi_def_cfa_offset 32 +; V8A-NEXT: mov w8, #1 // =0x1 +; V8A-NEXT: mov w9, #2 // =0x2 +; V8A-NEXT: stp w9, w8, [sp, #24] +; V8A-NEXT: mov w9, #3 // =0x3 +; V8A-NEXT: mov w8, #4 // =0x4 +; V8A-NEXT: stp w8, w9, [sp, #16] +; V8A-NEXT: mov w9, #5 // =0x5 +; V8A-NEXT: mov w8, #6 // =0x6 +; V8A-NEXT: stp w8, w9, [sp, #8] +; V8A-NEXT: add sp, sp, #32 +; V8A-NEXT: hint #29 +; V8A-NEXT: ret +; +; V83A-LABEL: a: +; V83A: // %bb.0: +; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: sub sp, sp, #32 +; V83A-NEXT: .cfi_def_cfa_offset 32 +; V83A-NEXT: mov w8, #1 // =0x1 +; V83A-NEXT: mov w9, #2 // =0x2 +; V83A-NEXT: stp w9, w8, [sp, #24] +; V83A-NEXT: mov w9, #3 // =0x3 +; V83A-NEXT: mov w8, #4 // =0x4 +; V83A-NEXT: stp w8, w9, [sp, #16] +; V83A-NEXT: mov w9, #5 // =0x5 +; V83A-NEXT: mov w8, #6 // =0x6 +; V83A-NEXT: stp w8, w9, [sp, #8] +; V83A-NEXT: add sp, sp, #32 +; V83A-NEXT: retaa %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -21,19 +53,48 @@ define void @a() "sign-return-address"="all" { store i32 4, ptr %4, align 4 store i32 5, ptr %5, align 4 store i32 6, ptr %6, align 4 -; V8A: hint #29 -; V83A: retaa ret void -; CHECK: .cfi_endproc } define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" { -; CHECK-LABEL: b: // @b -; CHECK: .cfi_b_key_frame -; V8A-NEXT: hint #27 -; V83A-NEXT: pacibsp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; V8A-LABEL: b: +; V8A: // %bb.0: +; V8A-NEXT: .cfi_b_key_frame +; V8A-NEXT: hint #27 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: sub sp, sp, #32 +; V8A-NEXT: .cfi_def_cfa_offset 32 +; V8A-NEXT: mov w8, #1 // =0x1 +; V8A-NEXT: mov w9, #2 // =0x2 +; V8A-NEXT: stp w9, w8, [sp, #24] +; V8A-NEXT: mov w9, #3 // =0x3 +; V8A-NEXT: mov w8, #4 // =0x4 +; V8A-NEXT: stp w8, w9, [sp, #16] +; V8A-NEXT: mov w9, #5 // =0x5 +; V8A-NEXT: mov w8, #6 // =0x6 +; V8A-NEXT: stp w8, w9, [sp, #8] +; V8A-NEXT: add sp, sp, #32 +; V8A-NEXT: hint #31 +; V8A-NEXT: ret +; +; V83A-LABEL: b: +; V83A: // %bb.0: +; V83A-NEXT: .cfi_b_key_frame +; V83A-NEXT: pacibsp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: sub sp, sp, #32 +; V83A-NEXT: .cfi_def_cfa_offset 32 +; V83A-NEXT: mov w8, #1 // =0x1 +; V83A-NEXT: mov w9, #2 // =0x2 +; V83A-NEXT: stp w9, w8, [sp, #24] +; V83A-NEXT: mov w9, #3 // =0x3 +; V83A-NEXT: mov w8, #4 // =0x4 +; V83A-NEXT: stp w8, w9, [sp, #16] +; V83A-NEXT: mov w9, #5 // =0x5 +; V83A-NEXT: mov w8, #6 // =0x6 +; V83A-NEXT: stp w8, w9, [sp, #8] +; V83A-NEXT: add sp, sp, #32 +; V83A-NEXT: retab %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -46,19 +107,46 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" { store i32 4, ptr %4, align 4 store i32 5, ptr %5, align 4 store i32 6, ptr %6, align 4 -; V8A-NOT: hint #29 -; V83A-NOT: autiasp -; V83A-NOT: retaa ret void -; CHECK: .cfi_endproc } define void @c() "sign-return-address"="all" { -; CHECK-LABEL: c: // @c -; V8A: hint #25 -; V83A: paciasp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; V8A-LABEL: c: +; V8A: // %bb.0: +; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: sub sp, sp, #32 +; V8A-NEXT: .cfi_def_cfa_offset 32 +; V8A-NEXT: mov w8, #1 // =0x1 +; V8A-NEXT: mov w9, #2 // =0x2 +; V8A-NEXT: stp w9, w8, [sp, #24] +; V8A-NEXT: mov w9, #3 // =0x3 +; V8A-NEXT: mov w8, #4 // =0x4 +; V8A-NEXT: stp w8, w9, [sp, #16] +; V8A-NEXT: mov w9, #5 // =0x5 +; V8A-NEXT: mov w8, #6 // =0x6 +; V8A-NEXT: stp w8, w9, [sp, #8] +; V8A-NEXT: add sp, sp, #32 +; V8A-NEXT: hint #29 +; V8A-NEXT: ret +; +; V83A-LABEL: c: +; V83A: // %bb.0: +; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: sub sp, sp, #32 +; V83A-NEXT: .cfi_def_cfa_offset 32 +; V83A-NEXT: mov w8, #1 // =0x1 +; V83A-NEXT: mov w9, #2 // =0x2 +; V83A-NEXT: stp w9, w8, [sp, #24] +; V83A-NEXT: mov w9, #3 // =0x3 +; V83A-NEXT: mov w8, #4 // =0x4 +; V83A-NEXT: stp w8, w9, [sp, #16] +; V83A-NEXT: mov w9, #5 // =0x5 +; V83A-NEXT: mov w8, #6 // =0x6 +; V83A-NEXT: stp w8, w9, [sp, #8] +; V83A-NEXT: add sp, sp, #32 +; V83A-NEXT: retaa %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -71,11 +159,10 @@ define void @c() "sign-return-address"="all" { store i32 4, ptr %4, align 4 store i32 5, ptr %5, align 4 store i32 6, ptr %6, align 4 -; V8A: hint #29 -; V83A: retaa ret void -; CHECK: .cfi_endproc } ; CHECK-NOT: OUTLINED_FUNCTION_0: ; CHECK-NOT: // -- Begin function +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll index d5ef94e900993c8..a7ea32952f3b78e 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll @@ -10,8 +10,7 @@ define void @a() #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: .cfi_b_key_frame ; CHECK-NEXT: pacibsp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NOT: OUTLINED_FUNCTION_ %1 = alloca i32, align 4 %2 = alloca i32, align 4 @@ -35,8 +34,7 @@ define void @b() #0 { ; CHECK: // %bb.0: ; CHECK-NEXT: .cfi_b_key_frame ; CHECK-NEXT: pacibsp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NOT: OUTLINED_FUNCTION_ %1 = alloca i32, align 4 %2 = alloca i32, align 4 @@ -60,8 +58,7 @@ define void @c() #1 { ; CHECK: // %bb.0: ; CHECK-NEXT: .cfi_b_key_frame ; CHECK-NEXT: hint #27 -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset +; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NOT: OUTLINED_FUNCTION_ %1 = alloca i32, align 4 %2 = alloca i32, align 4 diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll index 3e361111b545532..da68ea5bf0dbcb9 100644 --- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll +++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner -outliner-leaf-descendants=false \ ; RUN: -verify-machineinstrs %s -o - | FileCheck --check-prefixes CHECK,V8A %s ; RUN: llc -mtriple aarch64 -enable-machine-outliner -outliner-leaf-descendants=false \ @@ -7,15 +8,38 @@ declare i32 @thunk_called_fn(i32, i32, i32, i32) define i32 @a() #0 { -; CHECK-LABEL: a: // @a -; CHECK: // %bb.0: // %entry -; V8A-NEXT: hint #25 -; V83A-NEXT: paciasp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset -; V8A: hint #29 -; V8A-NEXT: ret -; V83A: retaa +; V8A-LABEL: a: +; V8A: // %bb.0: // %entry +; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; V8A-NEXT: .cfi_def_cfa_offset 16 +; V8A-NEXT: .cfi_offset w30, -16 +; V8A-NEXT: mov w0, #1 // =0x1 +; V8A-NEXT: mov w1, #2 // =0x2 +; V8A-NEXT: mov w2, #3 // =0x3 +; V8A-NEXT: mov w3, #4 // =0x4 +; V8A-NEXT: bl thunk_called_fn +; V8A-NEXT: add w0, w0, #8 +; V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; V8A-NEXT: hint #29 +; V8A-NEXT: ret +; +; V83A-LABEL: a: +; V83A: // %bb.0: // %entry +; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; V83A-NEXT: .cfi_def_cfa_offset 16 +; V83A-NEXT: .cfi_offset w30, -16 +; V83A-NEXT: mov w0, #1 // =0x1 +; V83A-NEXT: mov w1, #2 // =0x2 +; V83A-NEXT: mov w2, #3 // =0x3 +; V83A-NEXT: mov w3, #4 // =0x4 +; V83A-NEXT: bl thunk_called_fn +; V83A-NEXT: add w0, w0, #8 +; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; V83A-NEXT: retaa entry: %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) %cx = add i32 %call, 8 @@ -23,15 +47,38 @@ entry: } define i32 @b() #0 { -; CHECK-LABEL: b: // @b -; CHECK: // %bb.0: // %entry -; V8A-NEXT: hint #25 -; V83A-NEXT: paciasp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset -; V8A: hint #29 -; V8A-NEXT: ret -; V83A: retaa +; V8A-LABEL: b: +; V8A: // %bb.0: // %entry +; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; V8A-NEXT: .cfi_def_cfa_offset 16 +; V8A-NEXT: .cfi_offset w30, -16 +; V8A-NEXT: mov w0, #1 // =0x1 +; V8A-NEXT: mov w1, #2 // =0x2 +; V8A-NEXT: mov w2, #3 // =0x3 +; V8A-NEXT: mov w3, #4 // =0x4 +; V8A-NEXT: bl thunk_called_fn +; V8A-NEXT: add w0, w0, #88 +; V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; V8A-NEXT: hint #29 +; V8A-NEXT: ret +; +; V83A-LABEL: b: +; V83A: // %bb.0: // %entry +; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; V83A-NEXT: .cfi_def_cfa_offset 16 +; V83A-NEXT: .cfi_offset w30, -16 +; V83A-NEXT: mov w0, #1 // =0x1 +; V83A-NEXT: mov w1, #2 // =0x2 +; V83A-NEXT: mov w2, #3 // =0x3 +; V83A-NEXT: mov w3, #4 // =0x4 +; V83A-NEXT: bl thunk_called_fn +; V83A-NEXT: add w0, w0, #88 +; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; V83A-NEXT: retaa entry: %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) %cx = add i32 %call, 88 @@ -39,15 +86,40 @@ entry: } define hidden i32 @c(ptr %fptr) #0 { -; CHECK-LABEL: c: // @c -; CHECK: // %bb.0: // %entry -; V8A-NEXT: hint #25 -; V83A-NEXT: paciasp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset -; V8A: hint #29 -; V8A-NEXT: ret -; V83A: retaa +; V8A-LABEL: c: +; V8A: // %bb.0: // %entry +; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; V8A-NEXT: .cfi_def_cfa_offset 16 +; V8A-NEXT: .cfi_offset w30, -16 +; V8A-NEXT: mov x8, x0 +; V8A-NEXT: mov w0, #1 // =0x1 +; V8A-NEXT: mov w1, #2 // =0x2 +; V8A-NEXT: mov w2, #3 // =0x3 +; V8A-NEXT: mov w3, #4 // =0x4 +; V8A-NEXT: blr x8 +; V8A-NEXT: add w0, w0, #8 +; V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; V8A-NEXT: hint #29 +; V8A-NEXT: ret +; +; V83A-LABEL: c: +; V83A: // %bb.0: // %entry +; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; V83A-NEXT: .cfi_def_cfa_offset 16 +; V83A-NEXT: .cfi_offset w30, -16 +; V83A-NEXT: mov x8, x0 +; V83A-NEXT: mov w0, #1 // =0x1 +; V83A-NEXT: mov w1, #2 // =0x2 +; V83A-NEXT: mov w2, #3 // =0x3 +; V83A-NEXT: mov w3, #4 // =0x4 +; V83A-NEXT: blr x8 +; V83A-NEXT: add w0, w0, #8 +; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; V83A-NEXT: retaa entry: %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) %add = add nsw i32 %call, 8 @@ -55,15 +127,40 @@ entry: } define hidden i32 @d(ptr %fptr) #0 { -; CHECK-LABEL: d: // @d -; CHECK: // %bb.0: // %entry -; V8A-NEXT: hint #25 -; V83A-NEXT: paciasp -; CHECK: .cfi_negate_ra_state -; CHECK-NEXT: .cfi_def_cfa_offset -; V8A: hint #29 -; V8A-NEXT: ret -; V83A: retaa +; V8A-LABEL: d: +; V8A: // %bb.0: // %entry +; V8A-NEXT: hint #25 +; V8A-NEXT: .cfi_negate_ra_state +; V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; V8A-NEXT: .cfi_def_cfa_offset 16 +; V8A-NEXT: .cfi_offset w30, -16 +; V8A-NEXT: mov x8, x0 +; V8A-NEXT: mov w0, #1 // =0x1 +; V8A-NEXT: mov w1, #2 // =0x2 +; V8A-NEXT: mov w2, #3 // =0x3 +; V8A-NEXT: mov w3, #4 // =0x4 +; V8A-NEXT: blr x8 +; V8A-NEXT: add w0, w0, #88 +; V8A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; V8A-NEXT: hint #29 +; V8A-NEXT: ret +; +; V83A-LABEL: d: +; V83A: // %bb.0: // %entry +; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; V83A-NEXT: .cfi_def_cfa_offset 16 +; V83A-NEXT: .cfi_offset w30, -16 +; V83A-NEXT: mov x8, x0 +; V83A-NEXT: mov w0, #1 // =0x1 +; V83A-NEXT: mov w1, #2 // =0x2 +; V83A-NEXT: mov w2, #3 // =0x3 +; V83A-NEXT: mov w3, #4 // =0x4 +; V83A-NEXT: blr x8 +; V83A-NEXT: add w0, w0, #88 +; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; V83A-NEXT: retaa entry: %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4) %add = add nsw i32 %call, 88 diff --git a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll index 0969ec246399fe5..373c4969a9405c3 100644 --- a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll +++ b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll @@ -35,8 +35,7 @@ entry: ;; CHECK-LABEL: __llvm_gcov_writeout: ;; CHECK: .cfi_b_key_frame ;; CHECK-NEXT: pacibsp -;; CHECK: .cfi_negate_ra_state -;; CHECK-NEXT: .cfi_def_cfa_offset +;; CHECK-NEXT: .cfi_negate_ra_state define internal void @__llvm_gcov_reset() unnamed_addr #2 { entry: diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll index eb224bbbd601fbc..4d4b7c215b978ae 100644 --- a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll +++ b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll @@ -10,8 +10,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 { ; CHECK-V8A-LABEL: _Z3fooi: ; CHECK-V8A: // %bb.0: // %entry ; CHECK-V8A-NEXT: hint #25 -; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V8A-NEXT: .cfi_negate_ra_state +; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V8A-NEXT: .cfi_offset w30, -16 ; CHECK-V8A-NEXT: str w0, [sp, #8] @@ -28,8 +28,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 { ; CHECK-V83A-LABEL: _Z3fooi: ; CHECK-V83A: // %bb.0: // %entry ; CHECK-V83A-NEXT: paciasp -; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V83A-NEXT: .cfi_negate_ra_state +; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V83A-NEXT: .cfi_offset w30, -16 ; CHECK-V83A-NEXT: str w0, [sp, #8] @@ -144,8 +144,8 @@ define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) { ; CHECK-V8A-LABEL: baz_sync: ; CHECK-V8A: // %bb.0: // %entry ; CHECK-V8A-NEXT: hint #25 -; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V8A-NEXT: .cfi_negate_ra_state +; CHECK-V8A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V8A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V8A-NEXT: .cfi_offset w30, -16 ; CHECK-V8A-NEXT: cbz w0, .LBB2_2 @@ -165,8 +165,8 @@ define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) { ; CHECK-V83A-LABEL: baz_sync: ; CHECK-V83A: // %bb.0: // %entry ; CHECK-V83A-NEXT: paciasp -; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V83A-NEXT: .cfi_negate_ra_state +; CHECK-V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-V83A-NEXT: .cfi_def_cfa_offset 16 ; CHECK-V83A-NEXT: .cfi_offset w30, -16 ; CHECK-V83A-NEXT: cbz w0, .LBB2_2 @@ -229,7 +229,6 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP: DW_CFA_restore_state: ; CHECK-DUMP: DW_CFA_AARCH64_negate_ra_state: -; CHECK-DUMP: CFA=WSP{{$}} ;; First DW_CFA_AARCH64_negate_ra_state: ; CHECK-DUMP: reg34=1 ;; Second DW_CFA_AARCH64_negate_ra_state: @@ -238,7 +237,7 @@ attributes #0 = { "sign-return-address"="all" } ; CHECK-DUMP: reg34=1 ;; Third DW_CFA_AARCH64_negate_ra_state: ; CHECK-DUMP: reg34=0 -; CHECK-DUMP-NOT: reg34= +; CHECK-DUMP-NOT: reg34=1 ; baz_sync ; CHECK-DUMP-LABEL: FDE diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll b/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll index 3d133e02106bc8b..fa689d2b9d7fddc 100644 --- a/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll +++ b/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll @@ -62,8 +62,9 @@ define i32 @leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-addr ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: .Ltmp0: ; COMPAT-NEXT: hint #25 -; COMPAT-NEXT: .cfi_negate_ra_state -; COMPAT-NEXT: adr x16, .Ltmp0 +; COMPAT-NEXT: .cfi_negate_ra_state_with_pc +; COMPAT-NEXT: adrp x16, .Ltmp0 +; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp0 ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: ret @@ -73,8 +74,9 @@ define i32 @leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-addr ; V83A-NEXT: hint #39 ; V83A-NEXT: .Ltmp0: ; V83A-NEXT: paciasp -; V83A-NEXT: .cfi_negate_ra_state -; V83A-NEXT: adr x16, .Ltmp0 +; V83A-NEXT: .cfi_negate_ra_state_with_pc +; V83A-NEXT: adrp x16, .Ltmp0 +; V83A-NEXT: add x16, x16, :lo12:.Ltmp0 ; V83A-NEXT: hint #39 ; V83A-NEXT: retaa ; @@ -82,7 +84,9 @@ define i32 @leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-addr ; PAUTHLR: // %bb.0: ; PAUTHLR-NEXT: .Ltmp0: ; PAUTHLR-NEXT: paciasppc -; PAUTHLR-NEXT: .cfi_negate_ra_state +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc +; PAUTHLR-NEXT: adrp x16, .Ltmp0 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp0 ; PAUTHLR-NEXT: retaasppc .Ltmp0 ret i32 %x } @@ -93,15 +97,16 @@ define i64 @leaf_clobbers_lr(i64 %x) "branch-protection-pauth-lr" "sign-return-a ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: .Ltmp1: ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 ; COMPAT-NEXT: //APP ; COMPAT-NEXT: mov x30, x0 ; COMPAT-NEXT: //NO_APP ; COMPAT-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; COMPAT-NEXT: adr x16, .Ltmp1 +; COMPAT-NEXT: adrp x16, .Ltmp1 +; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp1 ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: ret @@ -111,15 +116,16 @@ define i64 @leaf_clobbers_lr(i64 %x) "branch-protection-pauth-lr" "sign-return-a ; V83A-NEXT: hint #39 ; V83A-NEXT: .Ltmp1: ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: //APP ; V83A-NEXT: mov x30, x0 ; V83A-NEXT: //NO_APP ; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; V83A-NEXT: adr x16, .Ltmp1 +; V83A-NEXT: adrp x16, .Ltmp1 +; V83A-NEXT: add x16, x16, :lo12:.Ltmp1 ; V83A-NEXT: hint #39 ; V83A-NEXT: retaa ; @@ -127,14 +133,16 @@ define i64 @leaf_clobbers_lr(i64 %x) "branch-protection-pauth-lr" "sign-return-a ; PAUTHLR: // %bb.0: ; PAUTHLR-NEXT: .Ltmp1: ; PAUTHLR-NEXT: paciasppc +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc ; PAUTHLR-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; PAUTHLR-NEXT: .cfi_negate_ra_state ; PAUTHLR-NEXT: .cfi_def_cfa_offset 16 ; PAUTHLR-NEXT: .cfi_offset w30, -16 ; PAUTHLR-NEXT: //APP ; PAUTHLR-NEXT: mov x30, x0 ; PAUTHLR-NEXT: //NO_APP ; PAUTHLR-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; PAUTHLR-NEXT: adrp x16, .Ltmp1 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp1 ; PAUTHLR-NEXT: retaasppc .Ltmp1 call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1 ret i64 %x @@ -148,13 +156,14 @@ define i32 @non_leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return- ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: .Ltmp2: ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 ; COMPAT-NEXT: bl foo ; COMPAT-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; COMPAT-NEXT: adr x16, .Ltmp2 +; COMPAT-NEXT: adrp x16, .Ltmp2 +; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp2 ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: ret @@ -164,13 +173,14 @@ define i32 @non_leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return- ; V83A-NEXT: hint #39 ; V83A-NEXT: .Ltmp2: ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: bl foo ; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; V83A-NEXT: adr x16, .Ltmp2 +; V83A-NEXT: adrp x16, .Ltmp2 +; V83A-NEXT: add x16, x16, :lo12:.Ltmp2 ; V83A-NEXT: hint #39 ; V83A-NEXT: retaa ; @@ -178,12 +188,14 @@ define i32 @non_leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return- ; PAUTHLR: // %bb.0: ; PAUTHLR-NEXT: .Ltmp2: ; PAUTHLR-NEXT: paciasppc +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc ; PAUTHLR-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; PAUTHLR-NEXT: .cfi_negate_ra_state ; PAUTHLR-NEXT: .cfi_def_cfa_offset 16 ; PAUTHLR-NEXT: .cfi_offset w30, -16 ; PAUTHLR-NEXT: bl foo ; PAUTHLR-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; PAUTHLR-NEXT: adrp x16, .Ltmp2 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp2 ; PAUTHLR-NEXT: retaasppc .Ltmp2 %call = call i32 @foo(i32 %x) ret i32 %call @@ -195,13 +207,14 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "branch-protection-pauth-lr" "sign-re ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: .Ltmp3: ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 ; COMPAT-NEXT: bl foo ; COMPAT-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; COMPAT-NEXT: adr x16, .Ltmp3 +; COMPAT-NEXT: adrp x16, .Ltmp3 +; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp3 ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: ret @@ -211,13 +224,14 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "branch-protection-pauth-lr" "sign-re ; V83A-NEXT: hint #39 ; V83A-NEXT: .Ltmp3: ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: bl foo ; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; V83A-NEXT: adr x16, .Ltmp3 +; V83A-NEXT: adrp x16, .Ltmp3 +; V83A-NEXT: add x16, x16, :lo12:.Ltmp3 ; V83A-NEXT: hint #39 ; V83A-NEXT: retaa ; @@ -225,12 +239,14 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "branch-protection-pauth-lr" "sign-re ; PAUTHLR: // %bb.0: ; PAUTHLR-NEXT: .Ltmp3: ; PAUTHLR-NEXT: paciasppc +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc ; PAUTHLR-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; PAUTHLR-NEXT: .cfi_negate_ra_state ; PAUTHLR-NEXT: .cfi_def_cfa_offset 16 ; PAUTHLR-NEXT: .cfi_offset w30, -16 ; PAUTHLR-NEXT: bl foo ; PAUTHLR-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; PAUTHLR-NEXT: adrp x16, .Ltmp3 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp3 ; PAUTHLR-NEXT: retaasppc .Ltmp3 %call = call i32 @foo(i32 %x) ret i32 %call @@ -245,13 +261,14 @@ define i32 @non_leaf_scs(i32 %x) "branch-protection-pauth-lr" "sign-return-addre ; CHECK-NEXT: hint #39 ; CHECK-NEXT: .Ltmp4: ; CHECK-NEXT: paciasp +; CHECK-NEXT: .cfi_negate_ra_state_with_pc ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_negate_ra_state ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl foo ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: adr x16, .Ltmp4 +; CHECK-NEXT: adrp x16, .Ltmp4 +; CHECK-NEXT: add x16, x16, :lo12:.Ltmp4 ; CHECK-NEXT: hint #39 ; CHECK-NEXT: autiasp ; CHECK-NEXT: ldr x30, [x18, #-8]! @@ -263,12 +280,14 @@ define i32 @non_leaf_scs(i32 %x) "branch-protection-pauth-lr" "sign-return-addre ; PAUTHLR-NEXT: .cfi_escape 0x16, 0x12, 0x02, 0x82, 0x78 // ; PAUTHLR-NEXT: .Ltmp4: ; PAUTHLR-NEXT: paciasppc +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc ; PAUTHLR-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; PAUTHLR-NEXT: .cfi_negate_ra_state ; PAUTHLR-NEXT: .cfi_def_cfa_offset 16 ; PAUTHLR-NEXT: .cfi_offset w30, -16 ; PAUTHLR-NEXT: bl foo ; PAUTHLR-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; PAUTHLR-NEXT: adrp x16, .Ltmp4 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp4 ; PAUTHLR-NEXT: autiasppc .Ltmp4 ; PAUTHLR-NEXT: ldr x30, [x18, #-8]! ; PAUTHLR-NEXT: ret @@ -282,8 +301,9 @@ define i32 @leaf_sign_all_v83(i32 %x) "branch-protection-pauth-lr" "sign-return- ; CHECK-NEXT: hint #39 ; CHECK-NEXT: .Ltmp5: ; CHECK-NEXT: paciasp -; CHECK-NEXT: .cfi_negate_ra_state -; CHECK-NEXT: adr x16, .Ltmp5 +; CHECK-NEXT: .cfi_negate_ra_state_with_pc +; CHECK-NEXT: adrp x16, .Ltmp5 +; CHECK-NEXT: add x16, x16, :lo12:.Ltmp5 ; CHECK-NEXT: hint #39 ; CHECK-NEXT: retaa ; @@ -291,7 +311,9 @@ define i32 @leaf_sign_all_v83(i32 %x) "branch-protection-pauth-lr" "sign-return- ; PAUTHLR: // %bb.0: ; PAUTHLR-NEXT: .Ltmp5: ; PAUTHLR-NEXT: paciasppc -; PAUTHLR-NEXT: .cfi_negate_ra_state +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc +; PAUTHLR-NEXT: adrp x16, .Ltmp5 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp5 ; PAUTHLR-NEXT: retaasppc .Ltmp5 ret i32 %x } @@ -304,15 +326,16 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "branch-protection-pauth-lr" ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: .Ltmp6: ; COMPAT-NEXT: hint #25 +; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; COMPAT-NEXT: .cfi_negate_ra_state ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 ; COMPAT-NEXT: //APP ; COMPAT-NEXT: mov x30, x0 ; COMPAT-NEXT: //NO_APP ; COMPAT-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; COMPAT-NEXT: adr x16, .Ltmp6 +; COMPAT-NEXT: adrp x16, .Ltmp6 +; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp6 ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: b bar @@ -322,15 +345,16 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "branch-protection-pauth-lr" ; V83A-NEXT: hint #39 ; V83A-NEXT: .Ltmp6: ; V83A-NEXT: paciasp +; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; V83A-NEXT: .cfi_negate_ra_state ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: //APP ; V83A-NEXT: mov x30, x0 ; V83A-NEXT: //NO_APP ; V83A-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; V83A-NEXT: adr x16, .Ltmp6 +; V83A-NEXT: adrp x16, .Ltmp6 +; V83A-NEXT: add x16, x16, :lo12:.Ltmp6 ; V83A-NEXT: hint #39 ; V83A-NEXT: autiasp ; V83A-NEXT: b bar @@ -339,14 +363,16 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "branch-protection-pauth-lr" ; PAUTHLR: // %bb.0: ; PAUTHLR-NEXT: .Ltmp6: ; PAUTHLR-NEXT: paciasppc +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc ; PAUTHLR-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; PAUTHLR-NEXT: .cfi_negate_ra_state ; PAUTHLR-NEXT: .cfi_def_cfa_offset 16 ; PAUTHLR-NEXT: .cfi_offset w30, -16 ; PAUTHLR-NEXT: //APP ; PAUTHLR-NEXT: mov x30, x0 ; PAUTHLR-NEXT: //NO_APP ; PAUTHLR-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; PAUTHLR-NEXT: adrp x16, .Ltmp6 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp6 ; PAUTHLR-NEXT: autiasppc .Ltmp6 ; PAUTHLR-NEXT: b bar call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1 @@ -360,8 +386,9 @@ define i32 @leaf_sign_all_a_key(i32 %x) "branch-protection-pauth-lr" "sign-retur ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: .Ltmp7: ; COMPAT-NEXT: hint #25 -; COMPAT-NEXT: .cfi_negate_ra_state -; COMPAT-NEXT: adr x16, .Ltmp7 +; COMPAT-NEXT: .cfi_negate_ra_state_with_pc +; COMPAT-NEXT: adrp x16, .Ltmp7 +; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp7 ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: ret @@ -371,8 +398,9 @@ define i32 @leaf_sign_all_a_key(i32 %x) "branch-protection-pauth-lr" "sign-retur ; V83A-NEXT: hint #39 ; V83A-NEXT: .Ltmp7: ; V83A-NEXT: paciasp -; V83A-NEXT: .cfi_negate_ra_state -; V83A-NEXT: adr x16, .Ltmp7 +; V83A-NEXT: .cfi_negate_ra_state_with_pc +; V83A-NEXT: adrp x16, .Ltmp7 +; V83A-NEXT: add x16, x16, :lo12:.Ltmp7 ; V83A-NEXT: hint #39 ; V83A-NEXT: retaa ; @@ -380,7 +408,9 @@ define i32 @leaf_sign_all_a_key(i32 %x) "branch-protection-pauth-lr" "sign-retur ; PAUTHLR: // %bb.0: ; PAUTHLR-NEXT: .Ltmp7: ; PAUTHLR-NEXT: paciasppc -; PAUTHLR-NEXT: .cfi_negate_ra_state +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc +; PAUTHLR-NEXT: adrp x16, .Ltmp7 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp7 ; PAUTHLR-NEXT: retaasppc .Ltmp7 ret i32 %x } @@ -392,8 +422,9 @@ define i32 @leaf_sign_all_b_key(i32 %x) "branch-protection-pauth-lr" "sign-retur ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: .Ltmp8: ; COMPAT-NEXT: hint #27 -; COMPAT-NEXT: .cfi_negate_ra_state -; COMPAT-NEXT: adr x16, .Ltmp8 +; COMPAT-NEXT: .cfi_negate_ra_state_with_pc +; COMPAT-NEXT: adrp x16, .Ltmp8 +; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp8 ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: hint #31 ; COMPAT-NEXT: ret @@ -404,8 +435,9 @@ define i32 @leaf_sign_all_b_key(i32 %x) "branch-protection-pauth-lr" "sign-retur ; V83A-NEXT: hint #39 ; V83A-NEXT: .Ltmp8: ; V83A-NEXT: pacibsp -; V83A-NEXT: .cfi_negate_ra_state -; V83A-NEXT: adr x16, .Ltmp8 +; V83A-NEXT: .cfi_negate_ra_state_with_pc +; V83A-NEXT: adrp x16, .Ltmp8 +; V83A-NEXT: add x16, x16, :lo12:.Ltmp8 ; V83A-NEXT: hint #39 ; V83A-NEXT: retab ; @@ -414,7 +446,9 @@ define i32 @leaf_sign_all_b_key(i32 %x) "branch-protection-pauth-lr" "sign-retur ; PAUTHLR-NEXT: .cfi_b_key_frame ; PAUTHLR-NEXT: .Ltmp8: ; PAUTHLR-NEXT: pacibsppc -; PAUTHLR-NEXT: .cfi_negate_ra_state +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc +; PAUTHLR-NEXT: adrp x16, .Ltmp8 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp8 ; PAUTHLR-NEXT: retabsppc .Ltmp8 ret i32 %x } @@ -426,8 +460,9 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "branch-protection-pauth-lr" "sign-r ; CHECK-NEXT: hint #39 ; CHECK-NEXT: .Ltmp9: ; CHECK-NEXT: pacibsp -; CHECK-NEXT: .cfi_negate_ra_state -; CHECK-NEXT: adr x16, .Ltmp9 +; CHECK-NEXT: .cfi_negate_ra_state_with_pc +; CHECK-NEXT: adrp x16, .Ltmp9 +; CHECK-NEXT: add x16, x16, :lo12:.Ltmp9 ; CHECK-NEXT: hint #39 ; CHECK-NEXT: retab ; @@ -436,7 +471,9 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "branch-protection-pauth-lr" "sign-r ; PAUTHLR-NEXT: .cfi_b_key_frame ; PAUTHLR-NEXT: .Ltmp9: ; PAUTHLR-NEXT: pacibsppc -; PAUTHLR-NEXT: .cfi_negate_ra_state +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc +; PAUTHLR-NEXT: adrp x16, .Ltmp9 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp9 ; PAUTHLR-NEXT: retabsppc .Ltmp9 ret i32 %x } @@ -449,8 +486,9 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: .Ltmp10: ; COMPAT-NEXT: hint #25 -; COMPAT-NEXT: .cfi_negate_ra_state -; COMPAT-NEXT: adr x16, .Ltmp10 +; COMPAT-NEXT: .cfi_negate_ra_state_with_pc +; COMPAT-NEXT: adrp x16, .Ltmp10 +; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp10 ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: hint #29 ; COMPAT-NEXT: ret @@ -461,8 +499,9 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r ; V83A-NEXT: hint #39 ; V83A-NEXT: .Ltmp10: ; V83A-NEXT: paciasp -; V83A-NEXT: .cfi_negate_ra_state -; V83A-NEXT: adr x16, .Ltmp10 +; V83A-NEXT: .cfi_negate_ra_state_with_pc +; V83A-NEXT: adrp x16, .Ltmp10 +; V83A-NEXT: add x16, x16, :lo12:.Ltmp10 ; V83A-NEXT: hint #39 ; V83A-NEXT: retaa ; @@ -471,7 +510,9 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r ; PAUTHLR-NEXT: bti c ; PAUTHLR-NEXT: .Ltmp10: ; PAUTHLR-NEXT: paciasppc -; PAUTHLR-NEXT: .cfi_negate_ra_state +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc +; PAUTHLR-NEXT: adrp x16, .Ltmp10 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp10 ; PAUTHLR-NEXT: retaasppc .Ltmp10 ret i32 %x } @@ -485,8 +526,9 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: .Ltmp11: ; COMPAT-NEXT: hint #27 -; COMPAT-NEXT: .cfi_negate_ra_state -; COMPAT-NEXT: adr x16, .Ltmp11 +; COMPAT-NEXT: .cfi_negate_ra_state_with_pc +; COMPAT-NEXT: adrp x16, .Ltmp11 +; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp11 ; COMPAT-NEXT: hint #39 ; COMPAT-NEXT: hint #31 ; COMPAT-NEXT: ret @@ -498,8 +540,9 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r ; V83A-NEXT: hint #39 ; V83A-NEXT: .Ltmp11: ; V83A-NEXT: pacibsp -; V83A-NEXT: .cfi_negate_ra_state -; V83A-NEXT: adr x16, .Ltmp11 +; V83A-NEXT: .cfi_negate_ra_state_with_pc +; V83A-NEXT: adrp x16, .Ltmp11 +; V83A-NEXT: add x16, x16, :lo12:.Ltmp11 ; V83A-NEXT: hint #39 ; V83A-NEXT: retab ; @@ -509,7 +552,9 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r ; PAUTHLR-NEXT: .cfi_b_key_frame ; PAUTHLR-NEXT: .Ltmp11: ; PAUTHLR-NEXT: pacibsppc -; PAUTHLR-NEXT: .cfi_negate_ra_state +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc +; PAUTHLR-NEXT: adrp x16, .Ltmp11 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp11 ; PAUTHLR-NEXT: retabsppc .Ltmp11 ret i32 %x } @@ -523,8 +568,9 @@ define i32 @leaf_sign_all_v83_b_key_bti(i32 %x) "branch-protection-pauth-lr" "si ; CHECK-NEXT: hint #39 ; CHECK-NEXT: .Ltmp12: ; CHECK-NEXT: pacibsp -; CHECK-NEXT: .cfi_negate_ra_state -; CHECK-NEXT: adr x16, .Ltmp12 +; CHECK-NEXT: .cfi_negate_ra_state_with_pc +; CHECK-NEXT: adrp x16, .Ltmp12 +; CHECK-NEXT: add x16, x16, :lo12:.Ltmp12 ; CHECK-NEXT: hint #39 ; CHECK-NEXT: retab ; @@ -534,7 +580,9 @@ define i32 @leaf_sign_all_v83_b_key_bti(i32 %x) "branch-protection-pauth-lr" "si ; PAUTHLR-NEXT: .cfi_b_key_frame ; PAUTHLR-NEXT: .Ltmp12: ; PAUTHLR-NEXT: pacibsppc -; PAUTHLR-NEXT: .cfi_negate_ra_state +; PAUTHLR-NEXT: .cfi_negate_ra_state_with_pc +; PAUTHLR-NEXT: adrp x16, .Ltmp12 +; PAUTHLR-NEXT: add x16, x16, :lo12:.Ltmp12 ; PAUTHLR-NEXT: retabsppc .Ltmp12 ret i32 %x } diff --git a/llvm/test/CodeGen/AArch64/sign-return-address.ll b/llvm/test/CodeGen/AArch64/sign-return-address.ll index c33463eb96a687c..dafe0d71ceb5f76 100644 --- a/llvm/test/CodeGen/AArch64/sign-return-address.ll +++ b/llvm/test/CodeGen/AArch64/sign-return-address.ll @@ -46,8 +46,8 @@ define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf" { ; COMPAT-LABEL: leaf_clobbers_lr: ; COMPAT: // %bb.0: ; COMPAT-NEXT: hint #25 -; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_negate_ra_state +; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 ; COMPAT-NEXT: //APP @@ -60,8 +60,8 @@ define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf" { ; V83A-LABEL: leaf_clobbers_lr: ; V83A: // %bb.0: ; V83A-NEXT: paciasp -; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: //APP @@ -79,8 +79,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" { ; COMPAT-LABEL: non_leaf_sign_all: ; COMPAT: // %bb.0: ; COMPAT-NEXT: hint #25 -; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_negate_ra_state +; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 ; COMPAT-NEXT: bl foo @@ -91,8 +91,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" { ; V83A-LABEL: non_leaf_sign_all: ; V83A: // %bb.0: ; V83A-NEXT: paciasp -; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: bl foo @@ -106,8 +106,8 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf" { ; COMPAT-LABEL: non_leaf_sign_non_leaf: ; COMPAT: // %bb.0: ; COMPAT-NEXT: hint #25 -; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_negate_ra_state +; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 ; COMPAT-NEXT: bl foo @@ -118,8 +118,8 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf" { ; V83A-LABEL: non_leaf_sign_non_leaf: ; V83A: // %bb.0: ; V83A-NEXT: paciasp -; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: bl foo @@ -136,8 +136,8 @@ define i32 @non_leaf_scs(i32 %x) "sign-return-address"="non-leaf" shadowcallstac ; CHECK-NEXT: str x30, [x18], #8 ; CHECK-NEXT: .cfi_escape 0x16, 0x12, 0x02, 0x82, 0x78 // ; CHECK-NEXT: paciasp -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_negate_ra_state +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl foo @@ -164,8 +164,8 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" { ; COMPAT-LABEL: spill_lr_and_tail_call: ; COMPAT: // %bb.0: ; COMPAT-NEXT: hint #25 -; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_negate_ra_state +; COMPAT-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; COMPAT-NEXT: .cfi_def_cfa_offset 16 ; COMPAT-NEXT: .cfi_offset w30, -16 ; COMPAT-NEXT: //APP @@ -178,8 +178,8 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" { ; V83A-LABEL: spill_lr_and_tail_call: ; V83A: // %bb.0: ; V83A-NEXT: paciasp -; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_negate_ra_state +; V83A-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; V83A-NEXT: .cfi_def_cfa_offset 16 ; V83A-NEXT: .cfi_offset w30, -16 ; V83A-NEXT: //APP diff --git a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir index a63bb8452ebbe14..d2b063a057139bc 100644 --- a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir +++ b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir @@ -12,6 +12,11 @@ entry: ret i32 2 } + + define dso_local i32 @foobar() "sign-return-address"="all" "branch-protection-pauth-lr"="true" { + entry: + ret i32 2 + } ... --- #CHECK: foo @@ -46,3 +51,21 @@ body: | RET_ReallyLR implicit killed $w0 ... +--- +#CHECK: foobar +name: foobar +alignment: 4 +tracksRegLiveness: true +frameInfo: + maxCallFrameSize: 0 +#CHECK: frame-setup PACM +#CHECK: frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp, pre-instr-symbol +#CHECK: frame-setup CFI_INSTRUCTION negate_ra_sign_state_with_pc +#CHECK: frame-destroy PACM +#CHECK: frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp +body: | + bb.0.entry: + $w0 = MOVi32imm 2 + RET_ReallyLR implicit killed $w0 + +... diff --git a/llvm/test/MC/AArch64/directives-case_insensitive.s b/llvm/test/MC/AArch64/directives-case_insensitive.s index be92e00cfad11a2..35a90a1bffea8d0 100644 --- a/llvm/test/MC/AArch64/directives-case_insensitive.s +++ b/llvm/test/MC/AArch64/directives-case_insensitive.s @@ -32,10 +32,12 @@ fred .REQ x5 .CFI_STARTPROC .CFI_NEGATE_RA_STATE +.CFI_NEGATE_RA_STATE_WITH_PC .CFI_B_KEY_FRAME .CFI_ENDPROC // CHECK: .cfi_startproc // CHECK: .cfi_negate_ra_state +// CHECK: .cfi_negate_ra_state_with_pc // CHECK: .cfi_b_key_frame // CHECK: .cfi_endproc diff --git a/llvm/test/MC/AArch64/negate_ra_state_with_pc.s b/llvm/test/MC/AArch64/negate_ra_state_with_pc.s new file mode 100644 index 000000000000000..44b8ab2df9a908a --- /dev/null +++ b/llvm/test/MC/AArch64/negate_ra_state_with_pc.s @@ -0,0 +1,7 @@ +//RUN: llvm-mc -triple=aarch64-arm-none-eabi -o - %s | FileCheck %s + +// CHECK: .cfi_negate_ra_state_with_pc +foo: + .cfi_startproc + .cfi_negate_ra_state_with_pc + .cfi_endproc diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp index 17fb18fc6b4d24b..2be656547c92e01 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp @@ -174,6 +174,7 @@ TEST(DWARFDebugFrame, InvalidCFIOpcodesTest) { dwarf::DW_CFA_MIPS_advance_loc8, dwarf::DW_CFA_GNU_window_save, dwarf::DW_CFA_AARCH64_negate_ra_state, + dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc, dwarf::DW_CFA_GNU_args_size}; dwarf::CIE TestCIE = createCIE(/*IsDWARF64=*/false, From d7ca703eab7997814de425eaa4fd888563d78831 Mon Sep 17 00:00:00 2001 From: Thomas Fransham Date: Mon, 28 Oct 2024 08:47:40 +0000 Subject: [PATCH 140/425] Add explicit symbol visibility macros to InstrProfData.inc (#110732) Add explicit symbol visibility macros to InstrProfData.inc Annotating these symbols will fix missing symbols for InstrProfTest when using shared library builds on windows with explicit visibility macros enabled. Add a empty fallback definition for LLVM_ABI macro so the code works in compiler-rt. This is part of the work to enable LLVM_BUILD_LLVM_DYLIB and plugins on window. ``` llvm\lld-link : error : undefined symbol: public: void ValueProfData::deserializeTo(InstrProfRecord&, InstrProfSymtab*) >>> referenced by unittests\ProfileData\InstrProfTest.cpp:1372 void ValueProfileReadWriteTest_value_prof_data_read_write_Test::TestBody() ``` --- compiler-rt/include/profile/InstrProfData.inc | 13 +++++++++++-- llvm/include/llvm/ProfileData/InstrProfData.inc | 13 +++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index c66b0465a0b5485..08ecaf0ed9fa5b1 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -62,6 +62,15 @@ #define INSTR_PROF_VISIBILITY #endif +/* This include is needed for symbol visibility macros used on + * ValueProfRecord\ValueProfData so there functions are exported from the + * LLVM shared library on windows. */ +#ifdef __cplusplus +#include "llvm/Support/Compiler.h" +#else +#define LLVM_ABI +#endif + // clang-format off:consider re-enabling clang-format if auto-formatted C macros // are readable (e.g., after `issue #82426` is fixed) /* INSTR_PROF_DATA start. */ @@ -373,7 +382,7 @@ INSTR_PROF_SECT_ENTRY(IPSK_covinit, \ * This is the header of the data structure that defines the on-disk * layout of the value profile data of a particular kind for one function. */ -typedef struct ValueProfRecord { +typedef struct LLVM_ABI ValueProfRecord { /* The kind of the value profile record. */ uint32_t Kind; /* @@ -423,7 +432,7 @@ typedef struct ValueProfRecord { * Per-function header/control data structure for value profiling * data in indexed format. */ -typedef struct ValueProfData { +typedef struct LLVM_ABI ValueProfData { /* * Total size in bytes including this field. It must be a multiple * of sizeof(uint64_t). diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index c66b0465a0b5485..08ecaf0ed9fa5b1 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -62,6 +62,15 @@ #define INSTR_PROF_VISIBILITY #endif +/* This include is needed for symbol visibility macros used on + * ValueProfRecord\ValueProfData so there functions are exported from the + * LLVM shared library on windows. */ +#ifdef __cplusplus +#include "llvm/Support/Compiler.h" +#else +#define LLVM_ABI +#endif + // clang-format off:consider re-enabling clang-format if auto-formatted C macros // are readable (e.g., after `issue #82426` is fixed) /* INSTR_PROF_DATA start. */ @@ -373,7 +382,7 @@ INSTR_PROF_SECT_ENTRY(IPSK_covinit, \ * This is the header of the data structure that defines the on-disk * layout of the value profile data of a particular kind for one function. */ -typedef struct ValueProfRecord { +typedef struct LLVM_ABI ValueProfRecord { /* The kind of the value profile record. */ uint32_t Kind; /* @@ -423,7 +432,7 @@ typedef struct ValueProfRecord { * Per-function header/control data structure for value profiling * data in indexed format. */ -typedef struct ValueProfData { +typedef struct LLVM_ABI ValueProfData { /* * Total size in bytes including this field. It must be a multiple * of sizeof(uint64_t). From 14171b0b13534e10ac57d13d072970616e17391b Mon Sep 17 00:00:00 2001 From: Thomas Fransham Date: Mon, 28 Oct 2024 08:52:22 +0000 Subject: [PATCH 141/425] [Remarks] Fix missing include in HotnessThresholdParser for Expected (#113755) The use of Expected<> requires llvm/Support/Error.h to be included. --- llvm/include/llvm/Remarks/HotnessThresholdParser.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/Remarks/HotnessThresholdParser.h b/llvm/include/llvm/Remarks/HotnessThresholdParser.h index 4cd0d2dff2fe640..374992de59878a5 100644 --- a/llvm/include/llvm/Remarks/HotnessThresholdParser.h +++ b/llvm/include/llvm/Remarks/HotnessThresholdParser.h @@ -16,6 +16,7 @@ #define LLVM_REMARKS_HOTNESSTHRESHOLDPARSER_H #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" #include namespace llvm { From 3d6923dbac16741736a841abee05f35bf7b0379a Mon Sep 17 00:00:00 2001 From: vabridgers <58314289+vabridgers@users.noreply.github.com> Date: Mon, 28 Oct 2024 03:53:36 -0500 Subject: [PATCH 142/425] RFC: [clang-tidy] [analyzer] Move nondeterministic pointer usage check to tidy (#110471) This change moves the `alpha.nondeterministic.PointerSorting` and `alpha.nondeterministic.PointerIteration` static analyzer checkers to a single `clang-tidy` check. Those checkers were implemented as simple `clang-tidy` check-like code, wrapped in the static analyzer framework. The documentation was updated to describe what the checks can and cannot do, and testing was completed on a broad set of open-source projects. Co-authored-by: Vince Bridgers --- .../bugprone/BugproneTidyModule.cpp | 3 + .../clang-tidy/bugprone/CMakeLists.txt | 1 + ...eterministicPointerIterationOrderCheck.cpp | 79 +++++++++ ...ndeterministicPointerIterationOrderCheck.h | 39 +++++ clang-tools-extra/docs/ReleaseNotes.rst | 6 + ...ndeterministic-pointer-iteration-order.rst | 44 +++++ .../docs/clang-tidy/checks/list.rst | 1 + .../system-header-simulator/sim_algorithm | 31 ++++ .../system-header-simulator/sim_c++config.h | 11 ++ .../sim_initializer_list | 39 +++++ .../system-header-simulator/sim_iterator_base | 22 +++ .../Inputs/system-header-simulator/sim_map | 34 ++++ .../Inputs/system-header-simulator/sim_set | 44 +++++ .../system-header-simulator/sim_stl_pair | 32 ++++ .../system-header-simulator/sim_type_traits | 19 +++ .../system-header-simulator/sim_unordered_map | 33 ++++ .../system-header-simulator/sim_unordered_set | 35 ++++ .../Inputs/system-header-simulator/sim_vector | 150 ++++++++++++++++++ ...ndeterministic-pointer-iteration-order.cpp | 84 ++++++++++ clang/docs/ReleaseNotes.rst | 6 + clang/docs/analyzer/checkers.rst | 31 ---- .../clang/StaticAnalyzer/Checkers/Checkers.td | 18 --- .../StaticAnalyzer/Checkers/CMakeLists.txt | 2 - .../Checkers/PointerIterationChecker.cpp | 101 ------------ .../Checkers/PointerSortingChecker.cpp | 115 -------------- clang/test/Analysis/ptr-iter.cpp | 28 ---- clang/test/Analysis/ptr-sort.cpp | 36 ----- 27 files changed, 713 insertions(+), 331 deletions(-) create mode 100644 clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_algorithm create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_c++config.h create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_initializer_list create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_iterator_base create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_map create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_set create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_stl_pair create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_type_traits create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_map create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_set create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_vector create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/nondeterministic-pointer-iteration-order.cpp delete mode 100644 clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp delete mode 100644 clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp delete mode 100644 clang/test/Analysis/ptr-iter.cpp delete mode 100644 clang/test/Analysis/ptr-sort.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp index 9120c4b6c0d9ae9..33ac65e715ce811 100644 --- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp @@ -49,6 +49,7 @@ #include "MultipleStatementMacroCheck.h" #include "NoEscapeCheck.h" #include "NonZeroEnumToBoolConversionCheck.h" +#include "NondeterministicPointerIterationOrderCheck.h" #include "NotNullTerminatedResultCheck.h" #include "OptionalValueConversionCheck.h" #include "ParentVirtualCallCheck.h" @@ -174,6 +175,8 @@ class BugproneModule : public ClangTidyModule { "bugprone-multiple-new-in-one-expression"); CheckFactories.registerCheck( "bugprone-multiple-statement-macro"); + CheckFactories.registerCheck( + "bugprone-nondeterministic-pointer-iteration-order"); CheckFactories.registerCheck( "bugprone-optional-value-conversion"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt index f0667bbfdd87f7f..b0a2318acc05970 100644 --- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt @@ -45,6 +45,7 @@ add_clang_library(clangTidyBugproneModule STATIC MultipleNewInOneExpressionCheck.cpp MultipleStatementMacroCheck.cpp NoEscapeCheck.cpp + NondeterministicPointerIterationOrderCheck.cpp NonZeroEnumToBoolConversionCheck.cpp NotNullTerminatedResultCheck.cpp OptionalValueConversionCheck.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp new file mode 100644 index 000000000000000..0797376d0aa9ff8 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp @@ -0,0 +1,79 @@ +//===----- NondeterministicPointerIterationOrderCheck.cpp - clang-tidy ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "NondeterministicPointerIterationOrderCheck.h" +#include "clang/AST/ASTContext.h" +#include "clang/Lex/Lexer.h" + +using namespace clang::ast_matchers; + +namespace clang::tidy::bugprone { + +void NondeterministicPointerIterationOrderCheck::registerMatchers( + MatchFinder *Finder) { + + auto LoopVariable = varDecl(hasType( + qualType(hasCanonicalType(anyOf(referenceType(), pointerType()))))); + + auto RangeInit = declRefExpr(to(varDecl( + hasType(recordDecl(hasAnyName("std::unordered_set", "std::unordered_map", + "std::unordered_multiset", + "std::unordered_multimap")) + .bind("recorddecl"))))); + + Finder->addMatcher(cxxForRangeStmt(hasLoopVariable(LoopVariable), + hasRangeInit(RangeInit.bind("rangeinit"))) + .bind("cxxForRangeStmt"), + this); + + auto SortFuncM = callee(functionDecl(hasAnyName( + "std::is_sorted", "std::nth_element", "std::sort", "std::partial_sort", + "std::partition", "std::stable_partition", "std::stable_sort"))); + + auto IteratesPointerEltsM = hasArgument( + 0, + cxxMemberCallExpr(on(hasType(cxxRecordDecl(has(fieldDecl(hasType(qualType( + hasCanonicalType(pointsTo(hasCanonicalType(pointerType())))))))))))); + + Finder->addMatcher( + callExpr(allOf(SortFuncM, IteratesPointerEltsM)).bind("sortsemantic"), + this); +} + +void NondeterministicPointerIterationOrderCheck::check( + const MatchFinder::MatchResult &Result) { + const auto *ForRangePointers = + Result.Nodes.getNodeAs("cxxForRangeStmt"); + + if ((ForRangePointers) && !(ForRangePointers->getBeginLoc().isMacroID())) { + const auto *RangeInit = Result.Nodes.getNodeAs("rangeinit"); + if (const auto *ClassTemplate = + Result.Nodes.getNodeAs( + "recorddecl")) { + const TemplateArgumentList &TemplateArgs = + ClassTemplate->getTemplateArgs(); + const llvm::StringRef AlgoName = ClassTemplate->getName(); + const bool IsAlgoArgPointer = + TemplateArgs[0].getAsType()->isPointerType(); + + if (IsAlgoArgPointer) { + SourceRange R = RangeInit->getSourceRange(); + diag(R.getBegin(), "iteration of pointers is nondeterministic") << R; + } + } + return; + } + const auto *SortPointers = Result.Nodes.getNodeAs("sortsemantic"); + + if ((SortPointers) && !(SortPointers->getBeginLoc().isMacroID())) { + SourceRange R = SortPointers->getSourceRange(); + diag(R.getBegin(), "sorting pointers is nondeterministic") << R; + } +} + +} // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h new file mode 100644 index 000000000000000..698872fefca9042 --- /dev/null +++ b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h @@ -0,0 +1,39 @@ +//=== NondeterministicPointerIterationOrderCheck.h - clang-tidy -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTIC_POINTER_ITERATION_ORDER_CHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTIC_POINTER_ITERATION_ORDER_CHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang::tidy::bugprone { + +/// Finds nondeterministic usages of pointers in unordered containers. The +/// check also finds calls to sorting-like algorithms on a container of +/// pointers. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order.html +class NondeterministicPointerIterationOrderCheck : public ClangTidyCheck { +public: + NondeterministicPointerIterationOrderCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { + return LangOpts.CPlusPlus; + } + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + std::optional getCheckTraversalKind() const override { + return TK_IgnoreUnlessSpelledInSource; + } +}; + +} // namespace clang::tidy::bugprone + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTIC_POINTER_ITERATION_ORDER_CHECK_H diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 4cc4c2146d7e330..7eb2ee511a05f53 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -119,6 +119,12 @@ New checks Warns about code that tries to cast between pointers by means of ``std::bit_cast`` or ``memcpy``. +- New :doc:`bugprone-nondeterministic-pointer-iteration-order + ` + check. + + Finds nondeterministic usages of pointers in unordered containers. + - New :doc:`bugprone-tagged-union-member-count ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order.rst new file mode 100644 index 000000000000000..41be0bf1c677ec5 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order.rst @@ -0,0 +1,44 @@ +.. title:: clang-tidy - bugprone-nondeterministic-pointer-iteration-order + +bugprone-nondeterministic-pointer-iteration-order +================================================= + +Finds nondeterministic usages of pointers in unordered containers. + +One canonical example is iteration across a container of pointers. + +.. code-block:: c++ + + { + int a = 1, b = 2; + std::unordered_set UnorderedPtrSet = {&a, &b}; + for (auto i : UnorderedPtrSet) + f(i); + } + +Another such example is sorting a container of pointers. + +.. code-block:: c++ + + { + int a = 1, b = 2; + std::vector VectorOfPtr = {&a, &b}; + std::sort(VectorOfPtr.begin(), VectorOfPtr.end()); + } + +Iteration of a containers of pointers may present the order of different +pointers differently across different runs of a program. In some cases this +may be acceptable behavior, in others this may be unexpected behavior. This +check is advisory for this reason. + +This check only detects range-based for loops over unordered sets and maps. It +also detects calls sorting-like algorithms on containers holding pointers. +Other similar usages will not be found and are false negatives. + +Limitations: + +* This check currently does not check if a nondeterministic iteration order is + likely to be a mistake, and instead marks all such iterations as bugprone. +* std::reference_wrapper is not considered yet. +* Only for loops are considered, other iterators can be included in + improvements. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 0082234f5ed31bd..d731b13fc0df446 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -115,6 +115,7 @@ Clang-Tidy Checks :doc:`bugprone-multiple-new-in-one-expression `, :doc:`bugprone-multiple-statement-macro `, :doc:`bugprone-no-escape `, + :doc:`bugprone-nondeterministic-pointer-iteration-order `, :doc:`bugprone-non-zero-enum-to-bool-conversion `, :doc:`bugprone-not-null-terminated-result `, "Yes" :doc:`bugprone-optional-value-conversion `, "Yes" diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_algorithm b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_algorithm new file mode 100644 index 000000000000000..6dbca55a8e365ff --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_algorithm @@ -0,0 +1,31 @@ +#ifndef _SIM_ALGORITHM +#define _SIM_ALGORITHM + +#pragma clang system_header + +namespace std { + +template +bool is_sorted(ForwardIt first, ForwardIt last); + +template +void nth_element(RandomIt first, RandomIt nth, RandomIt last); + +template +void partial_sort(RandomIt first, RandomIt middle, RandomIt last); + +template +void sort (RandomIt first, RandomIt last); + +template +void stable_sort(RandomIt first, RandomIt last); + +template +BidirIt partition(BidirIt first, BidirIt last, UnaryPredicate p); + +template +BidirIt stable_partition(BidirIt first, BidirIt last, UnaryPredicate p); + +} // namespace std + +#endif // _SIM_ALGORITHM diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_c++config.h b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_c++config.h new file mode 100644 index 000000000000000..ba98e0cc2208bab --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_c++config.h @@ -0,0 +1,11 @@ +#ifndef _SIM_CPP_CONFIG_H +#define _SIM_CPP_CONFIG_H + +#pragma clang system_header + +typedef unsigned char uint8_t; + +typedef __typeof__(sizeof(int)) size_t; +typedef __typeof__((char*)0-(char*)0) ptrdiff_t; + +#endif // _SIM_CPP_CONFIG_H diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_initializer_list b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_initializer_list new file mode 100644 index 000000000000000..e4d9d534b3bd78c --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_initializer_list @@ -0,0 +1,39 @@ +#ifndef _INITIALIZER_LIST +#define _INITIALIZER_LIST + +#pragma clang system_header +# +#include "sim_c++config.h" // size_t + +namespace std { + +template +class initializer_list { + const _E* __begin_; + size_t __size_; + + initializer_list(const _E* __b, size_t __s) + : __begin_(__b), + __size_(__s) + {} + +public: + typedef _E value_type; + typedef const _E& reference; + typedef const _E& const_reference; + typedef size_t size_type; + + typedef const _E* iterator; + typedef const _E* const_iterator; + + initializer_list() : __begin_(0), __size_(0) {} + + size_t size() const {return __size_;} + const _E* begin() const {return __begin_;} + const _E* end() const {return __begin_ + __size_;} + +}; // class initializer_list + +} // namespace std + +#endif // _INITIALIZER_LIST diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_iterator_base b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_iterator_base new file mode 100644 index 000000000000000..3b205d1722c9ddc --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_iterator_base @@ -0,0 +1,22 @@ +#ifndef _SIM_ITERATOR_BASE +#define _SIM_ITERATOR_BASE + +namespace std { + +struct input_iterator_tag { }; +struct output_iterator_tag { }; +struct forward_iterator_tag : public input_iterator_tag { }; +struct bidirectional_iterator_tag : public forward_iterator_tag { }; +struct random_access_iterator_tag : public bidirectional_iterator_tag { }; + +template struct iterator_traits { + typedef typename Iterator::difference_type difference_type; + typedef typename Iterator::value_type value_type; + typedef typename Iterator::pointer pointer; + typedef typename Iterator::reference reference; + typedef typename Iterator::iterator_category iterator_category; +}; + +} // namespace std + +#endif // _SIM_ITERATOR_BASE diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_map b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_map new file mode 100644 index 000000000000000..8c57f5c71f8814a --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_map @@ -0,0 +1,34 @@ + +#ifndef _SIM_MAP +#define _SIM_MAP + +#pragma clang system_header +#include "sim_stl_pair" + +namespace std { + +template +class map { + public: + using value_type = pair; + map(); + map(initializer_list> initList); + value_type& operator[](const Key& key); + value_type& operator[](Key&& key); + class iterator { + public: + iterator(Key *key): ptr(key) {} + iterator& operator++() { ++ptr; return *this; } + bool operator!=(const iterator &other) const { return ptr != other.ptr; } + const Key &operator*() const { return *ptr; } + private: + Key *ptr; + }; + Key *val; + iterator begin() const { return iterator(val); } + iterator end() const { return iterator(val + 1); } +}; + +} // namespace std + +#endif // _SIM_MAP diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_set b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_set new file mode 100644 index 000000000000000..f2f70095538925b --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_set @@ -0,0 +1,44 @@ + +#ifndef _SIM_SET +#define _SIM_SET + +#pragma clang system_header +#include "sim_initializer_list" + +namespace std { + +template< class T = void > +struct less; + +template< class T > +struct allocator; + +template< class Key > +struct hash; + +template< + class Key, + class Compare = std::less, + class Alloc = std::allocator +> class set { + public: + set(initializer_list __list) {} + + class iterator { + public: + iterator(Key *key): ptr(key) {} + iterator& operator++() { ++ptr; return *this; } + bool operator!=(const iterator &other) const { return ptr != other.ptr; } + const Key &operator*() const { return *ptr; } + private: + Key *ptr; + }; + + Key *val; + iterator begin() const { return iterator(val); } + iterator end() const { return iterator(val + 1); } +}; + +} // namespace std + +#endif // _SIM_SET diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_stl_pair b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_stl_pair new file mode 100644 index 000000000000000..d244bb363b861a0 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_stl_pair @@ -0,0 +1,32 @@ +#ifndef _SIM_STL_PAIR +#define _SIM_STL_PAIR + +#pragma clang system_header + +#include "sim_type_traits" + +namespace std { + +template +struct pair { + T1 first; + T2 second; + + pair() : first(), second() {} + pair(const T1 &a, const T2 &b) : first(a), second(b) {} + + template + pair(const pair &other) : first(other.first), + second(other.second) {} +}; + +template +pair::type, typename remove_reference::type> +make_pair(T1 &&, T2 &&) { + return {}; +}; + +} // namespace std + +#endif // _SIM_STL_PAIR + diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_type_traits b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_type_traits new file mode 100644 index 000000000000000..f066767c4d98589 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_type_traits @@ -0,0 +1,19 @@ + +#ifndef _SIM_TYPE_TRAITS +#define _SIM_TYPE_TRAITS + +#pragma clang system_header +namespace std { + +template< class T > struct remove_reference {typedef T type;}; +template< class T > struct remove_reference {typedef T type;}; +template< class T > struct remove_reference {typedef T type;}; + +template typename remove_reference::type&& move(T&& a); + +template< class T > +using remove_reference_t = typename remove_reference::type; + +} // namespace std + +#endif // _SIM_TYPE_TRAITS diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_map b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_map new file mode 100644 index 000000000000000..fabd8e7fd2d7486 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_map @@ -0,0 +1,33 @@ +#ifndef _SIM_UNORDERED_MAP +#define _SIM_UNORDERED_MAP + +#pragma clang system_header +#include "sim_initializer_list" + +namespace std { + +template +class unordered_map { +public: + using value_type = pair; + unordered_map(); + unordered_map(initializer_list> initList); + value_type& operator[](const Key& key); + value_type& operator[](Key&& key); + class iterator { + public: + iterator(Key *key): ptr(key) {} + iterator& operator++() { ++ptr; return *this; } + bool operator!=(const iterator &other) const { return ptr != other.ptr; } + const Key &operator*() const { return *ptr; } + private: + Key *ptr; + }; + Key *val; + iterator begin() const { return iterator(val); } + iterator end() const { return iterator(val + 1); } +}; + +} // namespace std + +#endif // _SIM_UNORDERED_MAP diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_set b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_set new file mode 100644 index 000000000000000..a077507bbdcbcb1 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_set @@ -0,0 +1,35 @@ +#ifndef _SIM_UNORDERED_SET +#define _SIM_UNORDERED_SET + +#pragma clang system_header +#include "sim_initializer_list" + +namespace std { + +template< + class Key, + class Hash = std::hash, + class Compare = std::less, + class Alloc = std::allocator +> class unordered_set { + public: + unordered_set(initializer_list __list) {} + + class iterator { + public: + iterator(Key *key): ptr(key) {} + iterator& operator++() { ++ptr; return *this; } + bool operator!=(const iterator &other) const { return ptr != other.ptr; } + const Key &operator*() const { return *ptr; } + private: + Key *ptr; + }; + + Key *val; + iterator begin() const { return iterator(val); } + iterator end() const { return iterator(val + 1); } +}; + +} // namespace std + +#endif // _SIM_UNORDERED_SET diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_vector b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_vector new file mode 100644 index 000000000000000..dfa9abfb8863ecc --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_vector @@ -0,0 +1,150 @@ +#ifndef _SIM_VECTOR +#define _SIM_VECTOR + +#pragma clang system_header + +#include "sim_iterator_base" + +namespace std { + +template struct __vector_iterator { + typedef __vector_iterator iterator; + typedef __vector_iterator const_iterator; + + typedef ptrdiff_t difference_type; + typedef T value_type; + typedef Ptr pointer; + typedef Ref reference; + typedef std::random_access_iterator_tag iterator_category; + + __vector_iterator(const Ptr p = 0) : ptr(p) {} + __vector_iterator(const iterator &rhs): ptr(rhs.base()) {} + __vector_iterator& operator++() { ++ ptr; return *this; } + __vector_iterator operator++(int) { + auto tmp = *this; + ++ ptr; + return tmp; + } + __vector_iterator operator--() { -- ptr; return *this; } + __vector_iterator operator--(int) { + auto tmp = *this; -- ptr; + return tmp; + } + __vector_iterator operator+(difference_type n) { + return ptr + n; + } + friend __vector_iterator operator+( + difference_type n, + const __vector_iterator &iter) { + return n + iter.ptr; + } + __vector_iterator operator-(difference_type n) { + return ptr - n; + } + __vector_iterator operator+=(difference_type n) { + return ptr += n; + } + __vector_iterator operator-=(difference_type n) { + return ptr -= n; + } + + template + difference_type operator-(const __vector_iterator &rhs); + + Ref operator*() const { return *ptr; } + Ptr operator->() const { return ptr; } + + Ref operator[](difference_type n) { + return *(ptr+n); + } + + bool operator==(const iterator &rhs) const { return ptr == rhs.ptr; } + bool operator==(const const_iterator &rhs) const { return ptr == rhs.ptr; } + + bool operator!=(const iterator &rhs) const { return ptr != rhs.ptr; } + bool operator!=(const const_iterator &rhs) const { return ptr != rhs.ptr; } + + const Ptr& base() const { return ptr; } + +private: + Ptr ptr; +}; + +template +class vector { + T *_start; + T *_finish; + T *_end_of_storage; + +public: + typedef T value_type; + typedef size_t size_type; + typedef __vector_iterator iterator; + typedef __vector_iterator const_iterator; + + vector() : _start(0), _finish(0), _end_of_storage(0) {} + template + vector(InputIterator first, InputIterator last); + vector(const vector &other); + vector(vector &&other); + ~vector(); + + size_t size() const { + return size_t(_finish - _start); + } + + vector& operator=(const vector &other); + vector& operator=(vector &&other); + vector& operator=(std::initializer_list ilist); + + void assign(size_type count, const T &value); + template + void assign(InputIterator first, InputIterator last); + void assign(std::initializer_list ilist); + + void clear(); + + void push_back(const T &value); + void push_back(T &&value); + template + void emplace_back(Args&&... args); + void pop_back(); + + iterator insert(const_iterator position, const value_type &val); + iterator insert(const_iterator position, size_type n, + const value_type &val); + template + iterator insert(const_iterator position, InputIterator first, + InputIterator last); + iterator insert(const_iterator position, value_type &&val); + iterator insert(const_iterator position, initializer_list il); + + template + iterator emplace(const_iterator position, Args&&... args); + + iterator erase(const_iterator position); + iterator erase(const_iterator first, const_iterator last); + + T &operator[](size_t n) { + return _start[n]; + } + + const T &operator[](size_t n) const { + return _start[n]; + } + + iterator begin() { return iterator(_start); } + const_iterator begin() const { return const_iterator(_start); } + const_iterator cbegin() const { return const_iterator(_start); } + iterator end() { return iterator(_finish); } + const_iterator end() const { return const_iterator(_finish); } + const_iterator cend() const { return const_iterator(_finish); } + T& front() { return *begin(); } + const T& front() const { return *begin(); } + T& back() { return *(end() - 1); } + const T& back() const { return *(end() - 1); } +}; + +} // namespace std + +#endif // _SIM_VECTOR diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/nondeterministic-pointer-iteration-order.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/nondeterministic-pointer-iteration-order.cpp new file mode 100644 index 000000000000000..91853874d0afc91 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/nondeterministic-pointer-iteration-order.cpp @@ -0,0 +1,84 @@ +// RUN: %check_clang_tidy %s bugprone-nondeterministic-pointer-iteration-order %t -- -- -I%S -std=c++!4 + +#include "Inputs/system-header-simulator/sim_set" +#include "Inputs/system-header-simulator/sim_unordered_set" +#include "Inputs/system-header-simulator/sim_map" +#include "Inputs/system-header-simulator/sim_unordered_map" +#include "Inputs/system-header-simulator/sim_vector" +#include "Inputs/system-header-simulator/sim_algorithm" + +template +void f(T x); + +void PointerIteration() { + int a = 1, b = 2; + std::set OrderedIntSet = {a, b}; + std::set OrderedPtrSet = {&a, &b}; + std::unordered_set UnorderedIntSet = {a, b}; + std::unordered_set UnorderedPtrSet = {&a, &b}; + std::map IntMap = { std::make_pair(a,a), std::make_pair(b,b) }; + std::map PtrMap = { std::make_pair(&a,&a), std::make_pair(&b,&b) }; + std::unordered_map IntUnorderedMap = { std::make_pair(a,a), std::make_pair(b,b) }; + std::unordered_map PtrUnorderedMap = { std::make_pair(&a,&a), std::make_pair(&b,&b) }; + + for (auto i : OrderedIntSet) // no-warning + f(i); + + for (auto i : OrderedPtrSet) // no-warning + f(i); + + for (auto i : UnorderedIntSet) // no-warning + f(i); + + for (auto i : UnorderedPtrSet) + f(i); + // CHECK-MESSAGES: :[[@LINE-2]]:17: warning: iteration of pointers is nondeterministic + + for (auto &i : UnorderedPtrSet) + f(i); + // CHECK-MESSAGES: :[[@LINE-2]]:18: warning: iteration of pointers is nondeterministic + + for (auto &i : IntMap) // no-warning + f(i); + + for (auto &i : PtrMap) // no-warning + f(i); + + for (auto &i : IntUnorderedMap) // no-warning + f(i); + + for (auto &i : PtrUnorderedMap) + f(i); + // CHECK-MESSAGES: :[[@LINE-2]]:18: warning: iteration of pointers is nondeterministic +} + +bool g (int *x) { return true; } +bool h (int x) { return true; } + +void PointerSorting() { + int a = 1, b = 2, c = 3; + std::vector V1 = {a, b}; + std::vector V2 = {&a, &b}; + + std::is_sorted(V1.begin(), V1.end()); // no-warning + std::nth_element(V1.begin(), V1.begin() + 1, V1.end()); // no-warning + std::partial_sort(V1.begin(), V1.begin() + 1, V1.end()); // no-warning + std::sort(V1.begin(), V1.end()); // no-warning + std::stable_sort(V1.begin(), V1.end()); // no-warning + std::partition(V1.begin(), V1.end(), h); // no-warning + std::stable_partition(V1.begin(), V1.end(), h); // no-warning + std::is_sorted(V2.begin(), V2.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic + std::nth_element(V2.begin(), V2.begin() + 1, V2.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic + std::partial_sort(V2.begin(), V2.begin() + 1, V2.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic + std::sort(V2.begin(), V2.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic + std::stable_sort(V2.begin(), V2.end()); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic + std::partition(V2.begin(), V2.end(), g); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic + std::stable_partition(V2.begin(), V2.end(), g); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic +} diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 31ee4f7e516feda..920a2369f964350 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -786,6 +786,12 @@ Moved checkers To detect too large arguments passed to malloc, consider using the checker ``alpha.taint.TaintedAlloc``. +- The checkers ``alpha.nondeterministic.PointerSorting`` and + ``alpha.nondeterministic.PointerIteration`` were moved to a new bugprone + checker named ``bugprone-nondeterministic-pointer-iteration-order``. The + original checkers were implemented only using AST matching and make more + sense as a single clang-tidy check. + .. _release-notes-sanitizers: Sanitizers diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst index 58dbd686a6dc9fb..87b03438e6e0b97 100644 --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -3447,37 +3447,6 @@ Limitations: More details at the corresponding `GitHub issue `_. -.. _alpha-nondeterminism-PointerIteration: - -alpha.nondeterminism.PointerIteration (C++) -""""""""""""""""""""""""""""""""""""""""""" -Check for non-determinism caused by iterating unordered containers of pointers. - -.. code-block:: c - - void test() { - int a = 1, b = 2; - std::unordered_set UnorderedPtrSet = {&a, &b}; - - for (auto i : UnorderedPtrSet) // warn - f(i); - } - -.. _alpha-nondeterminism-PointerSorting: - -alpha.nondeterminism.PointerSorting (C++) -""""""""""""""""""""""""""""""""""""""""" -Check for non-determinism caused by sorting of pointers. - -.. code-block:: c - - void test() { - int a = 1, b = 2; - std::vector V = {&a, &b}; - std::sort(V.begin(), V.end()); // warn - } - - alpha.WebKit ^^^^^^^^^^^^ diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td index 349040c15eeb83e..9a6b35c1b9f774e 100644 --- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -118,8 +118,6 @@ def Debug : Package<"debug">, Hidden; def CloneDetectionAlpha : Package<"clone">, ParentPackage; -def NonDeterminismAlpha : Package<"nondeterminism">, ParentPackage; - def Fuchsia : Package<"fuchsia">; def FuchsiaAlpha : Package<"fuchsia">, ParentPackage; @@ -1711,22 +1709,6 @@ def TaintedDivChecker: Checker<"TaintedDiv">, } // end "optin.taint" -//===----------------------------------------------------------------------===// -// NonDeterminism checkers. -//===----------------------------------------------------------------------===// - -let ParentPackage = NonDeterminismAlpha in { - -def PointerIterationChecker : Checker<"PointerIteration">, - HelpText<"Checks for non-determinism caused by iteration of unordered containers of pointers">, - Documentation; - -def PointerSortingChecker : Checker<"PointerSorting">, - HelpText<"Check for non-determinism caused by sorting of pointers">, - Documentation; - -} // end alpha.nondeterminism - //===----------------------------------------------------------------------===// // Fuchsia checkers. //===----------------------------------------------------------------------===// diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt index 6da3665ab9a4dfc..62aa5ff7f002a97 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt +++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt @@ -91,8 +91,6 @@ add_clang_library(clangStaticAnalyzerCheckers OSObjectCStyleCast.cpp PaddingChecker.cpp PointerArithChecker.cpp - PointerIterationChecker.cpp - PointerSortingChecker.cpp PointerSubChecker.cpp PthreadLockChecker.cpp PutenvStackArrayChecker.cpp diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp deleted file mode 100644 index 895b2160b76a7b0..000000000000000 --- a/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp +++ /dev/null @@ -1,101 +0,0 @@ -//== PointerIterationChecker.cpp ------------------------------- -*- C++ -*--=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines PointerIterationChecker which checks for non-determinism -// caused due to iteration of unordered containers of pointer elements. -// -//===----------------------------------------------------------------------===// - -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" -#include "clang/StaticAnalyzer/Core/Checker.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" - -using namespace clang; -using namespace ento; -using namespace ast_matchers; - -namespace { - -// ID of a node at which the diagnostic would be emitted. -constexpr llvm::StringLiteral WarnAtNode = "iter"; - -class PointerIterationChecker : public Checker { -public: - void checkASTCodeBody(const Decl *D, - AnalysisManager &AM, - BugReporter &BR) const; -}; - -static void emitDiagnostics(const BoundNodes &Match, const Decl *D, - BugReporter &BR, AnalysisManager &AM, - const PointerIterationChecker *Checker) { - auto *ADC = AM.getAnalysisDeclContext(D); - - const auto *MarkedStmt = Match.getNodeAs(WarnAtNode); - assert(MarkedStmt); - - auto Range = MarkedStmt->getSourceRange(); - auto Location = PathDiagnosticLocation::createBegin(MarkedStmt, - BR.getSourceManager(), - ADC); - std::string Diagnostics; - llvm::raw_string_ostream OS(Diagnostics); - OS << "Iteration of pointer-like elements " - << "can result in non-deterministic ordering"; - - BR.EmitBasicReport(ADC->getDecl(), Checker, - "Iteration of pointer-like elements", "Non-determinism", - Diagnostics, Location, Range); -} - -// Assumption: Iteration of ordered containers of pointers is deterministic. - -// TODO: Currently, we only check for std::unordered_set. Other unordered -// containers like std::unordered_map also need to be handled. - -// TODO: Currently, we do not check what the for loop does with the iterated -// pointer values. Not all iterations may cause non-determinism. For example, -// counting or summing up the elements should not be non-deterministic. - -auto matchUnorderedIterWithPointers() -> decltype(decl()) { - - auto UnorderedContainerM = declRefExpr(to(varDecl(hasType( - recordDecl(hasName("std::unordered_set") - ))))); - - auto PointerTypeM = varDecl(hasType(hasCanonicalType(pointerType()))); - - auto PointerIterM = stmt(cxxForRangeStmt( - hasLoopVariable(PointerTypeM), - hasRangeInit(UnorderedContainerM) - )).bind(WarnAtNode); - - return decl(forEachDescendant(PointerIterM)); -} - -void PointerIterationChecker::checkASTCodeBody(const Decl *D, - AnalysisManager &AM, - BugReporter &BR) const { - auto MatcherM = matchUnorderedIterWithPointers(); - - auto Matches = match(MatcherM, *D, AM.getASTContext()); - for (const auto &Match : Matches) - emitDiagnostics(Match, D, BR, AM, this); -} - -} // end of anonymous namespace - -void ento::registerPointerIterationChecker(CheckerManager &Mgr) { - Mgr.registerChecker(); -} - -bool ento::shouldRegisterPointerIterationChecker(const CheckerManager &mgr) { - const LangOptions &LO = mgr.getLangOpts(); - return LO.CPlusPlus; -} diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp deleted file mode 100644 index 25d87f4acfc910c..000000000000000 --- a/clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp +++ /dev/null @@ -1,115 +0,0 @@ -//== PointerSortingChecker.cpp --------------------------------- -*- C++ -*--=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines PointerSortingChecker which checks for non-determinism -// caused due to sorting containers with pointer-like elements. -// -//===----------------------------------------------------------------------===// - -#include "clang/ASTMatchers/ASTMatchFinder.h" -#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" -#include "clang/StaticAnalyzer/Core/Checker.h" -#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" - -using namespace clang; -using namespace ento; -using namespace ast_matchers; - -namespace { - -// ID of a node at which the diagnostic would be emitted. -constexpr llvm::StringLiteral WarnAtNode = "sort"; - -class PointerSortingChecker : public Checker { -public: - void checkASTCodeBody(const Decl *D, - AnalysisManager &AM, - BugReporter &BR) const; -}; - -static void emitDiagnostics(const BoundNodes &Match, const Decl *D, - BugReporter &BR, AnalysisManager &AM, - const PointerSortingChecker *Checker) { - auto *ADC = AM.getAnalysisDeclContext(D); - - const auto *MarkedStmt = Match.getNodeAs(WarnAtNode); - assert(MarkedStmt); - - auto Range = MarkedStmt->getSourceRange(); - auto Location = PathDiagnosticLocation::createBegin(MarkedStmt, - BR.getSourceManager(), - ADC); - std::string Diagnostics; - llvm::raw_string_ostream OS(Diagnostics); - OS << "Sorting pointer-like elements " - << "can result in non-deterministic ordering"; - - BR.EmitBasicReport(ADC->getDecl(), Checker, - "Sorting of pointer-like elements", "Non-determinism", - OS.str(), Location, Range); -} - -decltype(auto) callsName(const char *FunctionName) { - return callee(functionDecl(hasName(FunctionName))); -} - -// FIXME: Currently we simply check if std::sort is used with pointer-like -// elements. This approach can have a big false positive rate. Using std::sort, -// std::unique and then erase is common technique for deduplicating a container -// (which in some cases might even be quicker than using, let's say std::set). -// In case a container contains arbitrary memory addresses (e.g. multiple -// things give different stuff but might give the same thing multiple times) -// which we don't want to do things with more than once, we might use -// sort-unique-erase and the sort call will emit a report. -auto matchSortWithPointers() -> decltype(decl()) { - // Match any of these function calls. - auto SortFuncM = anyOf( - callsName("std::is_sorted"), - callsName("std::nth_element"), - callsName("std::partial_sort"), - callsName("std::partition"), - callsName("std::sort"), - callsName("std::stable_partition"), - callsName("std::stable_sort") - ); - - // Match only if the container has pointer-type elements. - auto IteratesPointerEltsM = hasArgument(0, - hasType(cxxRecordDecl(has( - fieldDecl(hasType(hasCanonicalType( - pointsTo(hasCanonicalType(pointerType())) - ))) - )))); - - auto PointerSortM = traverse( - TK_AsIs, - stmt(callExpr(allOf(SortFuncM, IteratesPointerEltsM))).bind(WarnAtNode)); - - return decl(forEachDescendant(PointerSortM)); -} - -void PointerSortingChecker::checkASTCodeBody(const Decl *D, - AnalysisManager &AM, - BugReporter &BR) const { - auto MatcherM = matchSortWithPointers(); - - auto Matches = match(MatcherM, *D, AM.getASTContext()); - for (const auto &Match : Matches) - emitDiagnostics(Match, D, BR, AM, this); -} - -} // end of anonymous namespace - -void ento::registerPointerSortingChecker(CheckerManager &Mgr) { - Mgr.registerChecker(); -} - -bool ento::shouldRegisterPointerSortingChecker(const CheckerManager &mgr) { - const LangOptions &LO = mgr.getLangOpts(); - return LO.CPlusPlus; -} diff --git a/clang/test/Analysis/ptr-iter.cpp b/clang/test/Analysis/ptr-iter.cpp deleted file mode 100644 index a94288cd1c8cccb..000000000000000 --- a/clang/test/Analysis/ptr-iter.cpp +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: %clang_analyze_cc1 %s -std=c++14 -analyzer-output=text -verify \ -// RUN: -analyzer-checker=core,alpha.nondeterminism.PointerIteration - -#include "Inputs/system-header-simulator-cxx.h" - -template -void f(T x); - -void PointerIteration() { - int a = 1, b = 2; - std::set OrderedIntSet = {a, b}; - std::set OrderedPtrSet = {&a, &b}; - std::unordered_set UnorderedIntSet = {a, b}; - std::unordered_set UnorderedPtrSet = {&a, &b}; - - for (auto i : OrderedIntSet) // no-warning - f(i); - - for (auto i : OrderedPtrSet) // no-warning - f(i); - - for (auto i : UnorderedIntSet) // no-warning - f(i); - - for (auto i : UnorderedPtrSet) // expected-warning {{Iteration of pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerIteration] -// expected-note@-1 {{Iteration of pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerIteration] - f(i); -} diff --git a/clang/test/Analysis/ptr-sort.cpp b/clang/test/Analysis/ptr-sort.cpp deleted file mode 100644 index d238b390bdc2357..000000000000000 --- a/clang/test/Analysis/ptr-sort.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// RUN: %clang_analyze_cc1 %s -std=c++14 -analyzer-output=text -verify \ -// RUN: -analyzer-checker=core,alpha.nondeterminism.PointerSorting - -#include "Inputs/system-header-simulator-cxx.h" - -bool f(int x) { return true; } -bool g(int *x) { return true; } - -void PointerSorting() { - int a = 1, b = 2; - std::vector V1 = {a, b}; - std::vector V2 = {&a, &b}; - - std::is_sorted(V1.begin(), V1.end()); // no-warning - std::nth_element(V1.begin(), V1.begin() + 1, V1.end()); // no-warning - std::partial_sort(V1.begin(), V1.begin() + 1, V1.end()); // no-warning - std::sort(V1.begin(), V1.end()); // no-warning - std::stable_sort(V1.begin(), V1.end()); // no-warning - std::partition(V1.begin(), V1.end(), f); // no-warning - std::stable_partition(V1.begin(), V1.end(), g); // no-warning - - std::is_sorted(V2.begin(), V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - std::nth_element(V2.begin(), V2.begin() + 1, V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - std::partial_sort(V2.begin(), V2.begin() + 1, V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - std::sort(V2.begin(), V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - std::stable_sort(V2.begin(), V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - std::partition(V2.begin(), V2.end(), f); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - std::stable_partition(V2.begin(), V2.end(), g); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] - // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting] -} From f6b513a7857cbcdb7df93079916e6bb8a00bd0b8 Mon Sep 17 00:00:00 2001 From: Vassil Vassilev Date: Mon, 28 Oct 2024 08:57:57 +0000 Subject: [PATCH 143/425] Revert "Add explicit symbol visibility macros to InstrProfData.inc (#110732)" This reverts commit d7ca703eab7997814de425eaa4fd888563d78831 in llvm/llvm-project#110732 --- compiler-rt/include/profile/InstrProfData.inc | 13 ++----------- llvm/include/llvm/ProfileData/InstrProfData.inc | 13 ++----------- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc index 08ecaf0ed9fa5b1..c66b0465a0b5485 100644 --- a/compiler-rt/include/profile/InstrProfData.inc +++ b/compiler-rt/include/profile/InstrProfData.inc @@ -62,15 +62,6 @@ #define INSTR_PROF_VISIBILITY #endif -/* This include is needed for symbol visibility macros used on - * ValueProfRecord\ValueProfData so there functions are exported from the - * LLVM shared library on windows. */ -#ifdef __cplusplus -#include "llvm/Support/Compiler.h" -#else -#define LLVM_ABI -#endif - // clang-format off:consider re-enabling clang-format if auto-formatted C macros // are readable (e.g., after `issue #82426` is fixed) /* INSTR_PROF_DATA start. */ @@ -382,7 +373,7 @@ INSTR_PROF_SECT_ENTRY(IPSK_covinit, \ * This is the header of the data structure that defines the on-disk * layout of the value profile data of a particular kind for one function. */ -typedef struct LLVM_ABI ValueProfRecord { +typedef struct ValueProfRecord { /* The kind of the value profile record. */ uint32_t Kind; /* @@ -432,7 +423,7 @@ typedef struct LLVM_ABI ValueProfRecord { * Per-function header/control data structure for value profiling * data in indexed format. */ -typedef struct LLVM_ABI ValueProfData { +typedef struct ValueProfData { /* * Total size in bytes including this field. It must be a multiple * of sizeof(uint64_t). diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index 08ecaf0ed9fa5b1..c66b0465a0b5485 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -62,15 +62,6 @@ #define INSTR_PROF_VISIBILITY #endif -/* This include is needed for symbol visibility macros used on - * ValueProfRecord\ValueProfData so there functions are exported from the - * LLVM shared library on windows. */ -#ifdef __cplusplus -#include "llvm/Support/Compiler.h" -#else -#define LLVM_ABI -#endif - // clang-format off:consider re-enabling clang-format if auto-formatted C macros // are readable (e.g., after `issue #82426` is fixed) /* INSTR_PROF_DATA start. */ @@ -382,7 +373,7 @@ INSTR_PROF_SECT_ENTRY(IPSK_covinit, \ * This is the header of the data structure that defines the on-disk * layout of the value profile data of a particular kind for one function. */ -typedef struct LLVM_ABI ValueProfRecord { +typedef struct ValueProfRecord { /* The kind of the value profile record. */ uint32_t Kind; /* @@ -432,7 +423,7 @@ typedef struct LLVM_ABI ValueProfRecord { * Per-function header/control data structure for value profiling * data in indexed format. */ -typedef struct LLVM_ABI ValueProfData { +typedef struct ValueProfData { /* * Total size in bytes including this field. It must be a multiple * of sizeof(uint64_t). From dff114b3565e4c981fcb40f24f72a0cb426294fe Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Mon, 28 Oct 2024 10:01:06 +0100 Subject: [PATCH 144/425] [ARM] Optimise non-ABI frame pointers (#110286) With -fomit-frame-pointer, even if we set up a frame pointer for other reasons (e.g. variable-sized or over-aligned stack allocations), we don't need to create an ABI-compliant frame record. This means that we can save all of the general-purpose registers in one push, instead of splitting it to ensure that the frame pointer and link register are adjacent on the stack, saving two instructions per function. --- llvm/lib/Target/ARM/ARMFrameLowering.cpp | 11 + llvm/lib/Target/ARM/ARMSubtarget.cpp | 30 +- llvm/lib/Target/ARM/ARMSubtarget.h | 4 + llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll | 47 +- llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll | 160 +++-- .../CodeGen/Thumb2/pacbti-m-frame-chain.ll | 617 +++++++++++++++--- .../Thumb2/pacbti-m-indirect-tail-call.ll | 36 +- .../CodeGen/Thumb2/pacbti-m-outliner-3.ll | 126 ++-- .../CodeGen/Thumb2/pacbti-m-outliner-4.ll | 198 +++--- .../test/CodeGen/Thumb2/pacbti-m-overalign.ll | 64 +- .../test/CodeGen/Thumb2/pacbti-m-stack-arg.ll | 9 +- .../test/CodeGen/Thumb2/pacbti-m-varargs-1.ll | 73 ++- .../test/CodeGen/Thumb2/pacbti-m-varargs-2.ll | 85 ++- llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll | 110 +++- 14 files changed, 1089 insertions(+), 481 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 4f366dcffcd29a7..a1d131103239bda 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -3003,6 +3003,17 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots( // on the stack. CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12)); break; + case ARMSubtarget::NoSplit: + assert(!MF.getTarget().Options.DisableFramePointerElim(MF) && + "ABI-required frame pointers need a CSR split when signing return " + "address."); + CSI.insert(find_if(CSI, + [=](const auto &CS) { + Register Reg = CS.getReg(); + return Reg != ARM::LR; + }), + CalleeSavedInfo(ARM::R12)); + break; default: llvm_unreachable("Unexpected CSR split with return address signing"); } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 9adfb1fab5f0847..cec44acc5443770 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -492,17 +492,16 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const { const std::vector CSI = MF.getFrameInfo().getCalleeSavedInfo(); - // Returns SplitR7 if the frame setup must be split into two separate pushes - // of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is - // always required on Thumb1-only targets, as the push and pop instructions - // can't access the high registers. This is also required when R7 is the frame - // pointer and frame pointer elimiination is disabled, or branch signing is - // enabled and AAPCS is disabled. - if ((MF.getInfo()->shouldSignReturnAddress() && - !createAAPCSFrameChain()) || - (getFramePointerReg() == ARM::R7 && - MF.getTarget().Options.DisableFramePointerElim(MF)) || - isThumb1Only()) + // Thumb1 always splits the pushes at R7, because the Thumb1 push instruction + // cannot use high registers except for lr. + if (isThumb1Only()) + return SplitR7; + + // If R7 is the frame pointer, we must split at R7 to ensure that the + // previous frame pointer (R7) and return address (LR) are adjacent on the + // stack, to form a valid frame record. + if (getFramePointerReg() == ARM::R7 && + MF.getTarget().Options.FramePointerIsReserved(MF)) return SplitR7; // Returns SplitR11WindowsSEH when the stack pointer needs to be @@ -515,11 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const { (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF))) return SplitR11WindowsSEH; - // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each - // other in the list of callee saved registers in a frame, and branch - // signing is enabled. + // Returns SplitR11AAPCSSignRA when the frame pointer is R11, requiring R11 + // and LR to be adjacent on the stack, and branch signing is enabled, + // requiring R12 to be on the stack. if (MF.getInfo()->shouldSignReturnAddress() && - getFramePointerReg() == ARM::R11) + getFramePointerReg() == ARM::R11 && + MF.getTarget().Options.FramePointerIsReserved(MF)) return SplitR11AAPCSSignRA; return NoSplit; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 214c5f1b45e556c..2f7af05a259f8f9 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -95,6 +95,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// push {r0-r7, lr} /// push {r8-r12} /// vpush {d8-d15} + /// Note that Thumb1 changes this layout when the frame pointer is R11, + /// using a longer sequence of instructions because R11 can't be used by a + /// Thumb1 push instruction. This doesn't currently have a separate enum + /// value, and is handled entriely within Thumb1FrameLowering::emitPrologue. SplitR7, /// When the stack frame size is not known (because of variable-sized diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll index c309d992b95a5e4..a0e6f9bf9b30d9b 100644 --- a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll +++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll @@ -1,9 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s ; RUN: llc --filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s --check-prefix=UNWIND target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-unknown-eabi" +; Check the function starts with `pacbti` and correct unwind info is emitted define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "branch-target-enforcement" { +; CHECK-LABEL: _Z1fi: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pacbti r12, lr, sp +; CHECK-NEXT: .save {r7, ra_auth_code, lr} +; CHECK-NEXT: push.w {r7, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r7, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: bl _Z1gi +; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r7, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %add = add nsw i32 %x, 1 %call = tail call i32 @_Z1gi(i32 %add) @@ -13,26 +36,10 @@ entry: declare dso_local i32 @_Z1gi(i32) -; Check the function starts with `pacbti` and correct unwind info is emitted -; CHECK-LABEL: _Z1fi: -; ... -; CHECK: pacbti r12, lr, sp -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .cfi_offset ra_auth_code, -12 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; ... - ; UNWIND-LABEL: Opcodes [ ; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0x80 0x08 ; pop {r7} ; UNWIND-NEXT: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} -; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} + + diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll index 0ae46cb8879ee0e..31f8ecddcb986c5 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s ; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" @@ -16,6 +17,27 @@ target triple = "thumbv8m.main-none-none-eabi" ; } define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf" { +; CHECK-LABEL: f0: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r7, ra_auth_code, lr} +; CHECK-NEXT: push.w {r7, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r7, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: bl g +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r7, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %sub = add nsw i32 %x, -1 %call = tail call i32 @g(i32 %sub) @@ -23,27 +45,28 @@ entry: ret i32 %add } -; CHECK-LABEL: f0: -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .cfi_offset ra_auth_code, -12 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; ... -; CHECK: add sp, #4 -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r7, lr} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr - define hidden i32 @f1(i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: f1: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: .save {r7, ra_auth_code, lr} +; CHECK-NEXT: push.w {r7, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -8 +; CHECK-NEXT: .cfi_offset ra_auth_code, -12 +; CHECK-NEXT: .cfi_offset r7, -16 +; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: bl g +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: pop.w {r7, r12, lr} +; CHECK-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr} +; CHECK-NEXT: vldr fpcxtns, [sp], #4 +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: clrm {r1, r2, r3, r12, apsr} +; CHECK-NEXT: bxns lr entry: %sub = add nsw i32 %x, -1 %call = tail call i32 @g(i32 %sub) @@ -51,44 +74,55 @@ entry: ret i32 %add } -; CHECK-LABEL: f1: -; CHECK: pac r12, lr, sp -; CHECK-NEXT: vstr fpcxtns, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 4 -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK: vldr fpcxtns, [sp], #4 -; CHECK: aut r12, lr, sp - define hidden i32 @f2(i32 %x) local_unnamed_addr #1 { +; CHECK-LABEL: f2: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r7, ra_auth_code, lr} +; CHECK-NEXT: push.w {r7, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r7, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subs r0, #1 +; CHECK-NEXT: bl g +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r7, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: mrs r12, control +; CHECK-NEXT: tst.w r12, #8 +; CHECK-NEXT: beq .LBB2_2 +; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: vmrs r12, fpscr +; CHECK-NEXT: vmov d0, lr, lr +; CHECK-NEXT: vmov d1, lr, lr +; CHECK-NEXT: vmov d2, lr, lr +; CHECK-NEXT: vmov d3, lr, lr +; CHECK-NEXT: vmov d4, lr, lr +; CHECK-NEXT: vmov d5, lr, lr +; CHECK-NEXT: vmov d6, lr, lr +; CHECK-NEXT: vmov d7, lr, lr +; CHECK-NEXT: bic r12, r12, #159 +; CHECK-NEXT: bic r12, r12, #4026531840 +; CHECK-NEXT: vmsr fpscr, r12 +; CHECK-NEXT: .LBB2_2: @ %entry +; CHECK-NEXT: mov r1, lr +; CHECK-NEXT: mov r2, lr +; CHECK-NEXT: mov r3, lr +; CHECK-NEXT: mov r12, lr +; CHECK-NEXT: msr apsr_nzcvq, lr +; CHECK-NEXT: bxns lr entry: %sub = add nsw i32 %x, -1 %call = tail call i32 @g(i32 %sub) %add = add nsw i32 %call, 1 ret i32 %add } -; CHECK-LABEL: f2: -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .cfi_offset ra_auth_code, -12 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; ... -; CHECK: add sp, #4 -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r7, lr} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: mrs r12, control -; ... -; CHECK: bxns lr declare dso_local i32 @g(i32) local_unnamed_addr @@ -103,22 +137,22 @@ attributes #1 = { "sign-return-address"="non-leaf" "cmse_nonsecure_entry" "targe ; UNWIND-LABEL: FunctionAddress: 0x0 ; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0x80 0x08 ; pop {r7} ; UNWIND-NEXT: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} -; UNWIND-NEXT: 0xB0 ; finish -; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} -; UNWIND-LABEL: FunctionAddress: 0x24 -; UNWIND: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} -; UNWIND-LABEL: FunctionAddress: 0x54 +; UNWIND-LABEL: FunctionAddress: 0x1E +; UNWIND: 0x80 0x08 ; pop {r7} +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} + +; UNWIND-LABEL: FunctionAddress: 0x48 ; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0x80 0x08 ; pop {r7} ; UNWIND-NEXT: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} -; UNWIND-NEXT: 0xB0 ; finish -; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} ; UNWIND-LABEL: 00000001 {{.*}} f0 -; UNWIND-LABEL: 00000025 {{.*}} f1 -; UNWIND-LABEL: 00000055 {{.*}} f2 +; UNWIND-LABEL: 0000001f {{.*}} f1 +; UNWIND-LABEL: 00000049 {{.*}} f2 diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll index 8bcf87130c54008..7e9258002f61598 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll @@ -1,32 +1,87 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s +; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=none | FileCheck %s --check-prefix=R7 +; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=reserved | FileCheck %s --check-prefix=R7-RES +; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all | FileCheck %s --check-prefix=R7-ABI +; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=none -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11 +; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=reserved -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11-RES +; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11-ABI ; int test1() { ; return 0; ; } define i32 @test1() "sign-return-address"="non-leaf" { -; CHECK-LABEL: test1: -; CHECK: .cfi_sections .debug_frame -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: @ %bb.0: @ %entry -; CHECK-NEXT: pac r12, lr, sp -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 4 -; CHECK-NEXT: .cfi_offset ra_auth_code, -4 -; CHECK-NEXT: .save {r11, lr} -; CHECK-NEXT: push.w {r11, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .cfi_offset lr, -8 -; CHECK-NEXT: .cfi_offset r11, -12 -; CHECK-NEXT: .setfp r11, sp -; CHECK-NEXT: mov r11, sp -; CHECK-NEXT: .cfi_def_cfa_register r11 -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: pop.w {r11, lr} -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr +; R7-LABEL: test1: +; R7: .cfi_sections .debug_frame +; R7-NEXT: .cfi_startproc +; R7-NEXT: @ %bb.0: @ %entry +; R7-NEXT: movs r0, #0 +; R7-NEXT: bx lr +; +; R7-RES-LABEL: test1: +; R7-RES: .cfi_sections .debug_frame +; R7-RES-NEXT: .cfi_startproc +; R7-RES-NEXT: @ %bb.0: @ %entry +; R7-RES-NEXT: movs r0, #0 +; R7-RES-NEXT: bx lr +; +; R7-ABI-LABEL: test1: +; R7-ABI: .cfi_sections .debug_frame +; R7-ABI-NEXT: .cfi_startproc +; R7-ABI-NEXT: @ %bb.0: @ %entry +; R7-ABI-NEXT: pac r12, lr, sp +; R7-ABI-NEXT: .save {r7, lr} +; R7-ABI-NEXT: push {r7, lr} +; R7-ABI-NEXT: .cfi_def_cfa_offset 8 +; R7-ABI-NEXT: .cfi_offset lr, -4 +; R7-ABI-NEXT: .cfi_offset r7, -8 +; R7-ABI-NEXT: .setfp r7, sp +; R7-ABI-NEXT: mov r7, sp +; R7-ABI-NEXT: .cfi_def_cfa_register r7 +; R7-ABI-NEXT: .save {ra_auth_code} +; R7-ABI-NEXT: str r12, [sp, #-4]! +; R7-ABI-NEXT: .cfi_offset ra_auth_code, -12 +; R7-ABI-NEXT: movs r0, #0 +; R7-ABI-NEXT: ldr r12, [sp], #4 +; R7-ABI-NEXT: pop.w {r7, lr} +; R7-ABI-NEXT: aut r12, lr, sp +; R7-ABI-NEXT: bx lr +; +; R11-LABEL: test1: +; R11: .cfi_sections .debug_frame +; R11-NEXT: .cfi_startproc +; R11-NEXT: @ %bb.0: @ %entry +; R11-NEXT: movs r0, #0 +; R11-NEXT: bx lr +; +; R11-RES-LABEL: test1: +; R11-RES: .cfi_sections .debug_frame +; R11-RES-NEXT: .cfi_startproc +; R11-RES-NEXT: @ %bb.0: @ %entry +; R11-RES-NEXT: movs r0, #0 +; R11-RES-NEXT: bx lr +; +; R11-ABI-LABEL: test1: +; R11-ABI: .cfi_sections .debug_frame +; R11-ABI-NEXT: .cfi_startproc +; R11-ABI-NEXT: @ %bb.0: @ %entry +; R11-ABI-NEXT: pac r12, lr, sp +; R11-ABI-NEXT: .save {ra_auth_code} +; R11-ABI-NEXT: str r12, [sp, #-4]! +; R11-ABI-NEXT: .cfi_def_cfa_offset 4 +; R11-ABI-NEXT: .cfi_offset ra_auth_code, -4 +; R11-ABI-NEXT: .save {r11, lr} +; R11-ABI-NEXT: push.w {r11, lr} +; R11-ABI-NEXT: .cfi_def_cfa_offset 12 +; R11-ABI-NEXT: .cfi_offset lr, -8 +; R11-ABI-NEXT: .cfi_offset r11, -12 +; R11-ABI-NEXT: .setfp r11, sp +; R11-ABI-NEXT: mov r11, sp +; R11-ABI-NEXT: .cfi_def_cfa_register r11 +; R11-ABI-NEXT: movs r0, #0 +; R11-ABI-NEXT: pop.w {r11, lr} +; R11-ABI-NEXT: ldr r12, [sp], #4 +; R11-ABI-NEXT: aut r12, lr, sp +; R11-ABI-NEXT: bx lr entry: ret i32 0 } @@ -36,37 +91,191 @@ entry: ; bar(a); ; } define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" { -; CHECK-LABEL: test2: -; CHECK: .cfi_startproc -; CHECK-NEXT: @ %bb.0: @ %entry -; CHECK-NEXT: pac r12, lr, sp -; CHECK-NEXT: .save {r4, r7, ra_auth_code} -; CHECK-NEXT: push.w {r4, r7, r12} -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .cfi_offset ra_auth_code, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .cfi_offset r4, -12 -; CHECK-NEXT: .save {r11, lr} -; CHECK-NEXT: push.w {r11, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset lr, -16 -; CHECK-NEXT: .cfi_offset r11, -20 -; CHECK-NEXT: .setfp r11, sp -; CHECK-NEXT: mov r11, sp -; CHECK-NEXT: .cfi_def_cfa_register r11 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: movs r1, #7 -; CHECK-NEXT: add.w r0, r1, r0, lsl #2 -; CHECK-NEXT: bic r0, r0, #7 -; CHECK-NEXT: sub.w r0, sp, r0 -; CHECK-NEXT: mov sp, r0 -; CHECK-NEXT: bl take_ptr -; CHECK-NEXT: mov sp, r11 -; CHECK-NEXT: pop.w {r11, lr} -; CHECK-NEXT: pop.w {r4, r7, r12} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr +; R7-LABEL: test2: +; R7: .cfi_startproc +; R7-NEXT: @ %bb.0: @ %entry +; R7-NEXT: pac r12, lr, sp +; R7-NEXT: .save {r4, r6, r7, ra_auth_code, lr} +; R7-NEXT: push.w {r4, r6, r7, r12, lr} +; R7-NEXT: .cfi_def_cfa_offset 20 +; R7-NEXT: .cfi_offset lr, -4 +; R7-NEXT: .cfi_offset ra_auth_code, -8 +; R7-NEXT: .cfi_offset r7, -12 +; R7-NEXT: .cfi_offset r6, -16 +; R7-NEXT: .cfi_offset r4, -20 +; R7-NEXT: .setfp r7, sp, #8 +; R7-NEXT: add r7, sp, #8 +; R7-NEXT: .cfi_def_cfa r7, 12 +; R7-NEXT: .pad #4 +; R7-NEXT: sub sp, #4 +; R7-NEXT: movs r1, #7 +; R7-NEXT: add.w r0, r1, r0, lsl #2 +; R7-NEXT: bic r0, r0, #7 +; R7-NEXT: sub.w r0, sp, r0 +; R7-NEXT: mov sp, r0 +; R7-NEXT: bl take_ptr +; R7-NEXT: sub.w r4, r7, #8 +; R7-NEXT: mov sp, r4 +; R7-NEXT: pop.w {r4, r6, r7, r12, lr} +; R7-NEXT: aut r12, lr, sp +; R7-NEXT: bx lr +; +; R7-RES-LABEL: test2: +; R7-RES: .cfi_startproc +; R7-RES-NEXT: @ %bb.0: @ %entry +; R7-RES-NEXT: pac r12, lr, sp +; R7-RES-NEXT: .save {r4, r6, r7, lr} +; R7-RES-NEXT: push {r4, r6, r7, lr} +; R7-RES-NEXT: .cfi_def_cfa_offset 16 +; R7-RES-NEXT: .cfi_offset lr, -4 +; R7-RES-NEXT: .cfi_offset r7, -8 +; R7-RES-NEXT: .cfi_offset r6, -12 +; R7-RES-NEXT: .cfi_offset r4, -16 +; R7-RES-NEXT: .setfp r7, sp, #8 +; R7-RES-NEXT: add r7, sp, #8 +; R7-RES-NEXT: .cfi_def_cfa r7, 8 +; R7-RES-NEXT: .save {ra_auth_code} +; R7-RES-NEXT: str r12, [sp, #-4]! +; R7-RES-NEXT: .cfi_offset ra_auth_code, -20 +; R7-RES-NEXT: .pad #4 +; R7-RES-NEXT: sub sp, #4 +; R7-RES-NEXT: movs r1, #7 +; R7-RES-NEXT: add.w r0, r1, r0, lsl #2 +; R7-RES-NEXT: bic r0, r0, #7 +; R7-RES-NEXT: sub.w r0, sp, r0 +; R7-RES-NEXT: mov sp, r0 +; R7-RES-NEXT: bl take_ptr +; R7-RES-NEXT: sub.w r4, r7, #12 +; R7-RES-NEXT: mov sp, r4 +; R7-RES-NEXT: ldr r12, [sp], #4 +; R7-RES-NEXT: pop.w {r4, r6, r7, lr} +; R7-RES-NEXT: aut r12, lr, sp +; R7-RES-NEXT: bx lr +; +; R7-ABI-LABEL: test2: +; R7-ABI: .cfi_startproc +; R7-ABI-NEXT: @ %bb.0: @ %entry +; R7-ABI-NEXT: pac r12, lr, sp +; R7-ABI-NEXT: .save {r4, r6, r7, lr} +; R7-ABI-NEXT: push {r4, r6, r7, lr} +; R7-ABI-NEXT: .cfi_def_cfa_offset 16 +; R7-ABI-NEXT: .cfi_offset lr, -4 +; R7-ABI-NEXT: .cfi_offset r7, -8 +; R7-ABI-NEXT: .cfi_offset r6, -12 +; R7-ABI-NEXT: .cfi_offset r4, -16 +; R7-ABI-NEXT: .setfp r7, sp, #8 +; R7-ABI-NEXT: add r7, sp, #8 +; R7-ABI-NEXT: .cfi_def_cfa r7, 8 +; R7-ABI-NEXT: .save {ra_auth_code} +; R7-ABI-NEXT: str r12, [sp, #-4]! +; R7-ABI-NEXT: .cfi_offset ra_auth_code, -20 +; R7-ABI-NEXT: .pad #4 +; R7-ABI-NEXT: sub sp, #4 +; R7-ABI-NEXT: movs r1, #7 +; R7-ABI-NEXT: add.w r0, r1, r0, lsl #2 +; R7-ABI-NEXT: bic r0, r0, #7 +; R7-ABI-NEXT: sub.w r0, sp, r0 +; R7-ABI-NEXT: mov sp, r0 +; R7-ABI-NEXT: bl take_ptr +; R7-ABI-NEXT: sub.w r4, r7, #12 +; R7-ABI-NEXT: mov sp, r4 +; R7-ABI-NEXT: ldr r12, [sp], #4 +; R7-ABI-NEXT: pop.w {r4, r6, r7, lr} +; R7-ABI-NEXT: aut r12, lr, sp +; R7-ABI-NEXT: bx lr +; +; R11-LABEL: test2: +; R11: .cfi_startproc +; R11-NEXT: @ %bb.0: @ %entry +; R11-NEXT: pac r12, lr, sp +; R11-NEXT: .save {r4, r7, r11, ra_auth_code, lr} +; R11-NEXT: push.w {r4, r7, r11, r12, lr} +; R11-NEXT: .cfi_def_cfa_offset 20 +; R11-NEXT: .cfi_offset lr, -4 +; R11-NEXT: .cfi_offset ra_auth_code, -8 +; R11-NEXT: .cfi_offset r11, -12 +; R11-NEXT: .cfi_offset r7, -16 +; R11-NEXT: .cfi_offset r4, -20 +; R11-NEXT: .setfp r11, sp, #8 +; R11-NEXT: add.w r11, sp, #8 +; R11-NEXT: .cfi_def_cfa r11, 12 +; R11-NEXT: .pad #4 +; R11-NEXT: sub sp, #4 +; R11-NEXT: movs r1, #7 +; R11-NEXT: add.w r0, r1, r0, lsl #2 +; R11-NEXT: bic r0, r0, #7 +; R11-NEXT: sub.w r0, sp, r0 +; R11-NEXT: mov sp, r0 +; R11-NEXT: bl take_ptr +; R11-NEXT: sub.w r4, r11, #8 +; R11-NEXT: mov sp, r4 +; R11-NEXT: pop.w {r4, r7, r11, r12, lr} +; R11-NEXT: aut r12, lr, sp +; R11-NEXT: bx lr +; +; R11-RES-LABEL: test2: +; R11-RES: .cfi_startproc +; R11-RES-NEXT: @ %bb.0: @ %entry +; R11-RES-NEXT: pac r12, lr, sp +; R11-RES-NEXT: .save {r4, r7, ra_auth_code} +; R11-RES-NEXT: push.w {r4, r7, r12} +; R11-RES-NEXT: .cfi_def_cfa_offset 12 +; R11-RES-NEXT: .cfi_offset ra_auth_code, -4 +; R11-RES-NEXT: .cfi_offset r7, -8 +; R11-RES-NEXT: .cfi_offset r4, -12 +; R11-RES-NEXT: .save {r11, lr} +; R11-RES-NEXT: push.w {r11, lr} +; R11-RES-NEXT: .cfi_def_cfa_offset 20 +; R11-RES-NEXT: .cfi_offset lr, -16 +; R11-RES-NEXT: .cfi_offset r11, -20 +; R11-RES-NEXT: .setfp r11, sp +; R11-RES-NEXT: mov r11, sp +; R11-RES-NEXT: .cfi_def_cfa_register r11 +; R11-RES-NEXT: .pad #4 +; R11-RES-NEXT: sub sp, #4 +; R11-RES-NEXT: movs r1, #7 +; R11-RES-NEXT: add.w r0, r1, r0, lsl #2 +; R11-RES-NEXT: bic r0, r0, #7 +; R11-RES-NEXT: sub.w r0, sp, r0 +; R11-RES-NEXT: mov sp, r0 +; R11-RES-NEXT: bl take_ptr +; R11-RES-NEXT: mov sp, r11 +; R11-RES-NEXT: pop.w {r11, lr} +; R11-RES-NEXT: pop.w {r4, r7, r12} +; R11-RES-NEXT: aut r12, lr, sp +; R11-RES-NEXT: bx lr +; +; R11-ABI-LABEL: test2: +; R11-ABI: .cfi_startproc +; R11-ABI-NEXT: @ %bb.0: @ %entry +; R11-ABI-NEXT: pac r12, lr, sp +; R11-ABI-NEXT: .save {r4, r7, ra_auth_code} +; R11-ABI-NEXT: push.w {r4, r7, r12} +; R11-ABI-NEXT: .cfi_def_cfa_offset 12 +; R11-ABI-NEXT: .cfi_offset ra_auth_code, -4 +; R11-ABI-NEXT: .cfi_offset r7, -8 +; R11-ABI-NEXT: .cfi_offset r4, -12 +; R11-ABI-NEXT: .save {r11, lr} +; R11-ABI-NEXT: push.w {r11, lr} +; R11-ABI-NEXT: .cfi_def_cfa_offset 20 +; R11-ABI-NEXT: .cfi_offset lr, -16 +; R11-ABI-NEXT: .cfi_offset r11, -20 +; R11-ABI-NEXT: .setfp r11, sp +; R11-ABI-NEXT: mov r11, sp +; R11-ABI-NEXT: .cfi_def_cfa_register r11 +; R11-ABI-NEXT: .pad #4 +; R11-ABI-NEXT: sub sp, #4 +; R11-ABI-NEXT: movs r1, #7 +; R11-ABI-NEXT: add.w r0, r1, r0, lsl #2 +; R11-ABI-NEXT: bic r0, r0, #7 +; R11-ABI-NEXT: sub.w r0, sp, r0 +; R11-ABI-NEXT: mov sp, r0 +; R11-ABI-NEXT: bl take_ptr +; R11-ABI-NEXT: mov sp, r11 +; R11-ABI-NEXT: pop.w {r11, lr} +; R11-ABI-NEXT: pop.w {r4, r7, r12} +; R11-ABI-NEXT: aut r12, lr, sp +; R11-ABI-NEXT: bx lr entry: %vla = alloca i32, i32 %n, align 4 call void @take_ptr(ptr noundef nonnull %vla) @@ -81,49 +290,263 @@ entry: ; knr(); ; } define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-return-address"="non-leaf" { -; CHECK-LABEL: test3: -; CHECK: .cfi_startproc -; CHECK-NEXT: @ %bb.0: @ %entry -; CHECK-NEXT: pac r12, lr, sp -; CHECK-NEXT: .save {r4, r5, r6, r7, ra_auth_code} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r12} -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset ra_auth_code, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .cfi_offset r6, -12 -; CHECK-NEXT: .cfi_offset r5, -16 -; CHECK-NEXT: .cfi_offset r4, -20 -; CHECK-NEXT: .save {r11, lr} -; CHECK-NEXT: push.w {r11, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 28 -; CHECK-NEXT: .cfi_offset lr, -24 -; CHECK-NEXT: .cfi_offset r11, -28 -; CHECK-NEXT: .setfp r11, sp -; CHECK-NEXT: mov r11, sp -; CHECK-NEXT: .cfi_def_cfa_register r11 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: it ne -; CHECK-NEXT: blne knr -; CHECK-NEXT: adds r0, r5, #7 -; CHECK-NEXT: bic r0, r0, #7 -; CHECK-NEXT: sub.w r0, sp, r0 -; CHECK-NEXT: mov sp, r0 -; CHECK-NEXT: bl take_ptr -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: movs r1, #0 -; CHECK-NEXT: bl __aeabi_fcmpeq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bleq knr -; CHECK-NEXT: mov sp, r11 -; CHECK-NEXT: pop.w {r11, lr} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r12} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr +; R7-LABEL: test3: +; R7: .cfi_startproc +; R7-NEXT: @ %bb.0: @ %entry +; R7-NEXT: pac r12, lr, sp +; R7-NEXT: .save {r4, r5, r6, r7, r8, ra_auth_code, lr} +; R7-NEXT: push.w {r4, r5, r6, r7, r8, r12, lr} +; R7-NEXT: .cfi_def_cfa_offset 28 +; R7-NEXT: .cfi_offset lr, -4 +; R7-NEXT: .cfi_offset ra_auth_code, -8 +; R7-NEXT: .cfi_offset r8, -12 +; R7-NEXT: .cfi_offset r7, -16 +; R7-NEXT: .cfi_offset r6, -20 +; R7-NEXT: .cfi_offset r5, -24 +; R7-NEXT: .cfi_offset r4, -28 +; R7-NEXT: .setfp r7, sp, #12 +; R7-NEXT: add r7, sp, #12 +; R7-NEXT: .cfi_def_cfa r7, 16 +; R7-NEXT: .pad #4 +; R7-NEXT: sub sp, #4 +; R7-NEXT: cmp r0, #0 +; R7-NEXT: mov r5, r2 +; R7-NEXT: mov r4, r1 +; R7-NEXT: it ne +; R7-NEXT: blne knr +; R7-NEXT: adds r0, r5, #7 +; R7-NEXT: bic r0, r0, #7 +; R7-NEXT: sub.w r0, sp, r0 +; R7-NEXT: mov sp, r0 +; R7-NEXT: bl take_ptr +; R7-NEXT: mov r0, r4 +; R7-NEXT: movs r1, #0 +; R7-NEXT: bl __aeabi_fcmpeq +; R7-NEXT: cmp r0, #0 +; R7-NEXT: it eq +; R7-NEXT: bleq knr +; R7-NEXT: sub.w r4, r7, #12 +; R7-NEXT: mov sp, r4 +; R7-NEXT: pop.w {r4, r5, r6, r7, r8, r12, lr} +; R7-NEXT: aut r12, lr, sp +; R7-NEXT: bx lr +; +; R7-RES-LABEL: test3: +; R7-RES: .cfi_startproc +; R7-RES-NEXT: @ %bb.0: @ %entry +; R7-RES-NEXT: pac r12, lr, sp +; R7-RES-NEXT: .save {r4, r5, r6, r7, lr} +; R7-RES-NEXT: push {r4, r5, r6, r7, lr} +; R7-RES-NEXT: .cfi_def_cfa_offset 20 +; R7-RES-NEXT: .cfi_offset lr, -4 +; R7-RES-NEXT: .cfi_offset r7, -8 +; R7-RES-NEXT: .cfi_offset r6, -12 +; R7-RES-NEXT: .cfi_offset r5, -16 +; R7-RES-NEXT: .cfi_offset r4, -20 +; R7-RES-NEXT: .setfp r7, sp, #12 +; R7-RES-NEXT: add r7, sp, #12 +; R7-RES-NEXT: .cfi_def_cfa r7, 8 +; R7-RES-NEXT: .save {r8, ra_auth_code} +; R7-RES-NEXT: push.w {r8, r12} +; R7-RES-NEXT: .cfi_offset ra_auth_code, -24 +; R7-RES-NEXT: .cfi_offset r8, -28 +; R7-RES-NEXT: .pad #4 +; R7-RES-NEXT: sub sp, #4 +; R7-RES-NEXT: cmp r0, #0 +; R7-RES-NEXT: mov r5, r2 +; R7-RES-NEXT: mov r4, r1 +; R7-RES-NEXT: it ne +; R7-RES-NEXT: blne knr +; R7-RES-NEXT: adds r0, r5, #7 +; R7-RES-NEXT: bic r0, r0, #7 +; R7-RES-NEXT: sub.w r0, sp, r0 +; R7-RES-NEXT: mov sp, r0 +; R7-RES-NEXT: bl take_ptr +; R7-RES-NEXT: mov r0, r4 +; R7-RES-NEXT: movs r1, #0 +; R7-RES-NEXT: bl __aeabi_fcmpeq +; R7-RES-NEXT: cmp r0, #0 +; R7-RES-NEXT: it eq +; R7-RES-NEXT: bleq knr +; R7-RES-NEXT: sub.w r4, r7, #20 +; R7-RES-NEXT: mov sp, r4 +; R7-RES-NEXT: pop.w {r8, r12} +; R7-RES-NEXT: pop.w {r4, r5, r6, r7, lr} +; R7-RES-NEXT: aut r12, lr, sp +; R7-RES-NEXT: bx lr +; +; R7-ABI-LABEL: test3: +; R7-ABI: .cfi_startproc +; R7-ABI-NEXT: @ %bb.0: @ %entry +; R7-ABI-NEXT: pac r12, lr, sp +; R7-ABI-NEXT: .save {r4, r5, r6, r7, lr} +; R7-ABI-NEXT: push {r4, r5, r6, r7, lr} +; R7-ABI-NEXT: .cfi_def_cfa_offset 20 +; R7-ABI-NEXT: .cfi_offset lr, -4 +; R7-ABI-NEXT: .cfi_offset r7, -8 +; R7-ABI-NEXT: .cfi_offset r6, -12 +; R7-ABI-NEXT: .cfi_offset r5, -16 +; R7-ABI-NEXT: .cfi_offset r4, -20 +; R7-ABI-NEXT: .setfp r7, sp, #12 +; R7-ABI-NEXT: add r7, sp, #12 +; R7-ABI-NEXT: .cfi_def_cfa r7, 8 +; R7-ABI-NEXT: .save {r8, ra_auth_code} +; R7-ABI-NEXT: push.w {r8, r12} +; R7-ABI-NEXT: .cfi_offset ra_auth_code, -24 +; R7-ABI-NEXT: .cfi_offset r8, -28 +; R7-ABI-NEXT: .pad #4 +; R7-ABI-NEXT: sub sp, #4 +; R7-ABI-NEXT: cmp r0, #0 +; R7-ABI-NEXT: mov r5, r2 +; R7-ABI-NEXT: mov r4, r1 +; R7-ABI-NEXT: it ne +; R7-ABI-NEXT: blne knr +; R7-ABI-NEXT: adds r0, r5, #7 +; R7-ABI-NEXT: bic r0, r0, #7 +; R7-ABI-NEXT: sub.w r0, sp, r0 +; R7-ABI-NEXT: mov sp, r0 +; R7-ABI-NEXT: bl take_ptr +; R7-ABI-NEXT: mov r0, r4 +; R7-ABI-NEXT: movs r1, #0 +; R7-ABI-NEXT: bl __aeabi_fcmpeq +; R7-ABI-NEXT: cmp r0, #0 +; R7-ABI-NEXT: it eq +; R7-ABI-NEXT: bleq knr +; R7-ABI-NEXT: sub.w r4, r7, #20 +; R7-ABI-NEXT: mov sp, r4 +; R7-ABI-NEXT: pop.w {r8, r12} +; R7-ABI-NEXT: pop.w {r4, r5, r6, r7, lr} +; R7-ABI-NEXT: aut r12, lr, sp +; R7-ABI-NEXT: bx lr +; +; R11-LABEL: test3: +; R11: .cfi_startproc +; R11-NEXT: @ %bb.0: @ %entry +; R11-NEXT: pac r12, lr, sp +; R11-NEXT: .save {r4, r5, r6, r7, r11, ra_auth_code, lr} +; R11-NEXT: push.w {r4, r5, r6, r7, r11, r12, lr} +; R11-NEXT: .cfi_def_cfa_offset 28 +; R11-NEXT: .cfi_offset lr, -4 +; R11-NEXT: .cfi_offset ra_auth_code, -8 +; R11-NEXT: .cfi_offset r11, -12 +; R11-NEXT: .cfi_offset r7, -16 +; R11-NEXT: .cfi_offset r6, -20 +; R11-NEXT: .cfi_offset r5, -24 +; R11-NEXT: .cfi_offset r4, -28 +; R11-NEXT: .setfp r11, sp, #16 +; R11-NEXT: add.w r11, sp, #16 +; R11-NEXT: .cfi_def_cfa r11, 12 +; R11-NEXT: .pad #4 +; R11-NEXT: sub sp, #4 +; R11-NEXT: cmp r0, #0 +; R11-NEXT: mov r5, r2 +; R11-NEXT: mov r4, r1 +; R11-NEXT: it ne +; R11-NEXT: blne knr +; R11-NEXT: adds r0, r5, #7 +; R11-NEXT: bic r0, r0, #7 +; R11-NEXT: sub.w r0, sp, r0 +; R11-NEXT: mov sp, r0 +; R11-NEXT: bl take_ptr +; R11-NEXT: mov r0, r4 +; R11-NEXT: movs r1, #0 +; R11-NEXT: bl __aeabi_fcmpeq +; R11-NEXT: cmp r0, #0 +; R11-NEXT: it eq +; R11-NEXT: bleq knr +; R11-NEXT: sub.w r4, r11, #16 +; R11-NEXT: mov sp, r4 +; R11-NEXT: pop.w {r4, r5, r6, r7, r11, r12, lr} +; R11-NEXT: aut r12, lr, sp +; R11-NEXT: bx lr +; +; R11-RES-LABEL: test3: +; R11-RES: .cfi_startproc +; R11-RES-NEXT: @ %bb.0: @ %entry +; R11-RES-NEXT: pac r12, lr, sp +; R11-RES-NEXT: .save {r4, r5, r6, r7, ra_auth_code} +; R11-RES-NEXT: push.w {r4, r5, r6, r7, r12} +; R11-RES-NEXT: .cfi_def_cfa_offset 20 +; R11-RES-NEXT: .cfi_offset ra_auth_code, -4 +; R11-RES-NEXT: .cfi_offset r7, -8 +; R11-RES-NEXT: .cfi_offset r6, -12 +; R11-RES-NEXT: .cfi_offset r5, -16 +; R11-RES-NEXT: .cfi_offset r4, -20 +; R11-RES-NEXT: .save {r11, lr} +; R11-RES-NEXT: push.w {r11, lr} +; R11-RES-NEXT: .cfi_def_cfa_offset 28 +; R11-RES-NEXT: .cfi_offset lr, -24 +; R11-RES-NEXT: .cfi_offset r11, -28 +; R11-RES-NEXT: .setfp r11, sp +; R11-RES-NEXT: mov r11, sp +; R11-RES-NEXT: .cfi_def_cfa_register r11 +; R11-RES-NEXT: .pad #4 +; R11-RES-NEXT: sub sp, #4 +; R11-RES-NEXT: cmp r0, #0 +; R11-RES-NEXT: mov r5, r2 +; R11-RES-NEXT: mov r4, r1 +; R11-RES-NEXT: it ne +; R11-RES-NEXT: blne knr +; R11-RES-NEXT: adds r0, r5, #7 +; R11-RES-NEXT: bic r0, r0, #7 +; R11-RES-NEXT: sub.w r0, sp, r0 +; R11-RES-NEXT: mov sp, r0 +; R11-RES-NEXT: bl take_ptr +; R11-RES-NEXT: mov r0, r4 +; R11-RES-NEXT: movs r1, #0 +; R11-RES-NEXT: bl __aeabi_fcmpeq +; R11-RES-NEXT: cmp r0, #0 +; R11-RES-NEXT: it eq +; R11-RES-NEXT: bleq knr +; R11-RES-NEXT: mov sp, r11 +; R11-RES-NEXT: pop.w {r11, lr} +; R11-RES-NEXT: pop.w {r4, r5, r6, r7, r12} +; R11-RES-NEXT: aut r12, lr, sp +; R11-RES-NEXT: bx lr +; +; R11-ABI-LABEL: test3: +; R11-ABI: .cfi_startproc +; R11-ABI-NEXT: @ %bb.0: @ %entry +; R11-ABI-NEXT: pac r12, lr, sp +; R11-ABI-NEXT: .save {r4, r5, r6, r7, ra_auth_code} +; R11-ABI-NEXT: push.w {r4, r5, r6, r7, r12} +; R11-ABI-NEXT: .cfi_def_cfa_offset 20 +; R11-ABI-NEXT: .cfi_offset ra_auth_code, -4 +; R11-ABI-NEXT: .cfi_offset r7, -8 +; R11-ABI-NEXT: .cfi_offset r6, -12 +; R11-ABI-NEXT: .cfi_offset r5, -16 +; R11-ABI-NEXT: .cfi_offset r4, -20 +; R11-ABI-NEXT: .save {r11, lr} +; R11-ABI-NEXT: push.w {r11, lr} +; R11-ABI-NEXT: .cfi_def_cfa_offset 28 +; R11-ABI-NEXT: .cfi_offset lr, -24 +; R11-ABI-NEXT: .cfi_offset r11, -28 +; R11-ABI-NEXT: .setfp r11, sp +; R11-ABI-NEXT: mov r11, sp +; R11-ABI-NEXT: .cfi_def_cfa_register r11 +; R11-ABI-NEXT: .pad #4 +; R11-ABI-NEXT: sub sp, #4 +; R11-ABI-NEXT: cmp r0, #0 +; R11-ABI-NEXT: mov r5, r2 +; R11-ABI-NEXT: mov r4, r1 +; R11-ABI-NEXT: it ne +; R11-ABI-NEXT: blne knr +; R11-ABI-NEXT: adds r0, r5, #7 +; R11-ABI-NEXT: bic r0, r0, #7 +; R11-ABI-NEXT: sub.w r0, sp, r0 +; R11-ABI-NEXT: mov sp, r0 +; R11-ABI-NEXT: bl take_ptr +; R11-ABI-NEXT: mov r0, r4 +; R11-ABI-NEXT: movs r1, #0 +; R11-ABI-NEXT: bl __aeabi_fcmpeq +; R11-ABI-NEXT: cmp r0, #0 +; R11-ABI-NEXT: it eq +; R11-ABI-NEXT: bleq knr +; R11-ABI-NEXT: mov sp, r11 +; R11-ABI-NEXT: pop.w {r11, lr} +; R11-ABI-NEXT: pop.w {r4, r5, r6, r7, r12} +; R11-ABI-NEXT: aut r12, lr, sp +; R11-ABI-NEXT: bx lr entry: %tobool.not = icmp eq i32 %c, 0 br i1 %tobool.not, label %if.end, label %if.then diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll index 4dfac252e2314ce..615af15e8b5679f 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll @@ -1,11 +1,30 @@ -; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK1 -; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK2 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc %s -o - | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-unknown-eabi" @p = hidden local_unnamed_addr global ptr null, align 4 define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +; CHECK-LABEL: f: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, r7, ra_auth_code, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r12, lr} +; CHECK-NEXT: mov r7, r3 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: bl g +; CHECK-NEXT: movw r1, :lower16:p +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: movt r1, :upper16:p +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: ldr r4, [r1] +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: blx r4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %call = tail call i32 @g(i32 %a) #0 %0 = load ptr, ptr @p, align 4 @@ -13,19 +32,6 @@ entry: ret i32 %call1 } -; CHECK1-LABEL: f -; ... -; CHECK1: aut r12, lr, sp -; CHECK1-NOT: bx r12 - -; CHECK2-LABEL: f -; ... -; CHECK2: blx r4 -; CHECK2-NEXT: ldr r12, [sp], #4 -; CHECK2-NEXT: pop.w {r4, r5, r6, r7, lr} -; CHECK2-NEXT: aut r12, lr, sp -; CHECK2-NEXT: bx lr - declare dso_local i32 @g(i32) local_unnamed_addr #0 attributes #0 = { nounwind "sign-return-address"="non-leaf"} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll index 1b13e06546f152b..d02d4b51d73b53e 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s ; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" @@ -21,12 +22,43 @@ target triple = "thumbv7m-arm-none-eabi" ; } define hidden i32 @h(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: h: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: bx lr entry: %add = add nsw i32 %b, %a ret i32 %add } define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +; CHECK-LABEL: f: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, ra_auth_code, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: push.w {r3, r4, r5, r6, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r5, -16 +; CHECK-NEXT: .cfi_offset r4, -20 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bmi .LBB1_2 +; CHECK-NEXT: @ %bb.1: @ %if.end +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: b .LBB1_3 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: .LBB1_3: @ %return +; CHECK-NEXT: pop.w {r3, r4, r5, r6, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %cmp = icmp slt i32 %a, 0 br i1 %cmp, label %return, label %if.end @@ -48,34 +80,32 @@ return: ; preds = %entry, %if.end ret i32 %retval.0 } -; CHECK-LABEL: f: -; ... -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r6, -8 -; CHECK-NEXT: .cfi_offset r5, -12 -; CHECK-NEXT: .cfi_offset r4, -16 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset ra_auth_code, -20 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; ... -; CHECK: bl OUTLINED_FUNCTION_0 -; ... -; CHECK: add sp, #4 -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r4, r5, r6, lr} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr - - define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +; CHECK-LABEL: g: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, ra_auth_code, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: push.w {r3, r4, r5, r6, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r5, -16 +; CHECK-NEXT: .cfi_offset r4, -20 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bmi .LBB2_2 +; CHECK-NEXT: @ %bb.1: @ %if.end +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: b .LBB2_3 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: .LBB2_3: @ %return +; CHECK-NEXT: pop.w {r3, r4, r5, r6, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %cmp = icmp slt i32 %a, 0 br i1 %cmp, label %return, label %if.end @@ -96,30 +126,6 @@ return: ; preds = %entry, %if.end %retval.0 = phi i32 [ %add3, %if.end ], [ -1, %entry ] ret i32 %retval.0 } -; CHECK-LABEL: g: -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r6, -8 -; CHECK-NEXT: .cfi_offset r5, -12 -; CHECK-NEXT: .cfi_offset r4, -16 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset ra_auth_code, -20 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; ... -; CHECK: bl OUTLINED_FUNCTION_0 -; ... -; CHECK: add sp, #4 -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r4, r5, r6, lr} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr ; CHECK-LABEL: OUTLINED_FUNCTION_0: ; CHECK: pac r12, lr, sp @@ -147,18 +153,20 @@ attributes #0 = { minsize noinline norecurse nounwind optsize readnone uwtable " ; UNWIND-LABEL: FunctionAddress: 0x4 ; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xA2 ; pop {r4, r5, r6} ; UNWIND-NEXT: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} -; UNWIND-LABEL: FunctionAddress: 0x30 +; UNWIND-LABEL: FunctionAddress: 0x26 ; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xA2 ; pop {r4, r5, r6} ; UNWIND-NEXT: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} -; UNWIND-LABEL: FunctionAddress: 0x5C +; UNWIND-LABEL: FunctionAddress: 0x48 ; UNWIND: 0xB4 ; pop ra_auth_code -; UNWIND: 0x84 0x00 ; pop {lr} - -; UNWIND-LABEL: 0000005d {{.*}} OUTLINED_FUNCTION_0 +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} + +; UNWIND-LABEL: 00000049 {{.*}} OUTLINED_FUNCTION_0 ; UNWIND-LABEL: 00000005 {{.*}} f -; UNWIND-LABEL: 00000031 {{.*}} g +; UNWIND-LABEL: 00000027 {{.*}} g diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll index 38c23977b623f9d..8777d517c4badcb 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s ; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" @@ -27,6 +28,37 @@ target triple = "thumbv7m-arm-none-eabi" @_ZTIi = external dso_local constant ptr define hidden i32 @_Z1hii(i32 %a, i32 %b) local_unnamed_addr #0 { +; CHECK-LABEL: _Z1hii: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r7, ra_auth_code, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: push.w {r6, r7, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r7, -12 +; CHECK-NEXT: cmp.w r0, #-1 +; CHECK-NEXT: ble .LBB0_2 +; CHECK-NEXT: @ %bb.1: @ %if.end +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: pop.w {r3, r7, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_2: @ %if.then +; CHECK-NEXT: movs r0, #4 +; CHECK-NEXT: bl __cxa_allocate_exception +; CHECK-NEXT: movs r1, #1 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: str r1, [r0] +; CHECK-NEXT: ldr r1, .LCPI0_0 +; CHECK-NEXT: bl __cxa_throw +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long _ZTIi entry: %cmp = icmp slt i32 %a, 0 br i1 %cmp, label %if.then, label %if.end @@ -42,31 +74,40 @@ if.end: ; preds = %entry ret i32 %add } -; CHECK-LABEL: _Z1hii: -; ... -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .cfi_offset ra_auth_code, -12 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; ... -; CHECK-NOT: pac -; CHECK: aut -; CHECK: .cfi_endproc - declare dso_local ptr @__cxa_allocate_exception(i32) local_unnamed_addr declare dso_local void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +; CHECK-LABEL: _Z1fiiii: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, ra_auth_code, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: push.w {r3, r4, r5, r6, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r5, -16 +; CHECK-NEXT: .cfi_offset r4, -20 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bmi .LBB1_2 +; CHECK-NEXT: @ %bb.1: @ %if.end +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: adds r1, r0, r6 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: adds r1, r4, r5 +; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: b .LBB1_3 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: .LBB1_3: @ %return +; CHECK-NEXT: pop.w {r3, r4, r5, r6, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %cmp = icmp slt i32 %a, 0 br i1 %cmp, label %return, label %if.end @@ -85,35 +126,36 @@ return: ; preds = %entry, %if.end ret i32 %retval.0 } -; CHECK-LABEL: _Z1fiiii: -; ... -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r6, -8 -; CHECK-NEXT: .cfi_offset r5, -12 -; CHECK-NEXT: .cfi_offset r4, -16 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset ra_auth_code, -20 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; ... -; CHECK: bl OUTLINED_FUNCTION_0 -; ... -; CHECK: add sp, #4 -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r4, r5, r6, lr} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr - - - define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +; CHECK-LABEL: _Z1giiii: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, ra_auth_code, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: push.w {r3, r4, r5, r6, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r5, -16 +; CHECK-NEXT: .cfi_offset r4, -20 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: bmi .LBB2_2 +; CHECK-NEXT: @ %bb.1: @ %if.end +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: adds r1, r0, r6 +; CHECK-NEXT: muls r0, r1, r0 +; CHECK-NEXT: adds r1, r4, r5 +; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: b .LBB2_3 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: mov.w r0, #-1 +; CHECK-NEXT: .LBB2_3: @ %return +; CHECK-NEXT: pop.w {r3, r4, r5, r6, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %cmp = icmp slt i32 %a, 0 br i1 %cmp, label %return, label %if.end @@ -132,33 +174,6 @@ return: ; preds = %entry, %if.end ret i32 %retval.0 } -; CHECK-LABEL: _Z1giiii: -; ... -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r6, -8 -; CHECK-NEXT: .cfi_offset r5, -12 -; CHECK-NEXT: .cfi_offset r4, -16 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset ra_auth_code, -20 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; ... -; CHECK: bl OUTLINED_FUNCTION_0 -; ... -; CHECK: add sp, #4 -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r4, r5, r6, lr} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr - - ; CHEK-LABEL: OUTLINED_FUNCTION_0: ; CHECK-NOT: pac ; CHECK-NOT: aut @@ -177,32 +192,31 @@ attributes #2 = { noreturn "sign-return-address"="non-leaf" } ; UNWIND-LABEL: FunctionAddress: 0x0 -; UNWIND: Opcodes +; UNWIND: Opcodes [ ; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0x80 0x08 ; pop {r7} ; UNWIND-NEXT: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} -; UNWIND-NEXT: 0xB0 ; finish -; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} -; UNWIND-LABEL: FunctionAddress: 0x3C -; UNWIND: Opcodes +; UNWIND-LABEL: FunctionAddress: 0x30 +; UNWIND: Opcodes [ ; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xA2 ; pop {r4, r5, r6} ; UNWIND-NEXT: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} -; UNWIND-LABEL: FunctionAddress: 0x72 -; UNWIND: Opcodes +; UNWIND-LABEL: FunctionAddress: 0x5C +; UNWIND: Opcodes [ ; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xA2 ; pop {r4, r5, r6} ; UNWIND-NEXT: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} -; UNWIND-LABEL: FunctionAddress: 0xA8 -; UNWIND: Opcodes -; UNWIND-NEXT: 0xB0 ; finish -; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-LABEL: FunctionAddress: 0x88 +; UNWIND: Opcodes [ ; UNWIND-NEXT: 0xB0 ; finish -; UNWIND: 000000a9 {{.*}} OUTLINED_FUNCTION_0 +; UNWIND: 00000089 {{.*}} OUTLINED_FUNCTION_0 ; UWNIND: 00000001 {{.*}} _Z1hii -; UWNIND: 0000003d {{.*}} _Z1fiiii -; UWNIND: 00000073 {{.*}} _Z1giiii +; UWNIND: 00000031 {{.*}} _Z1fiiii +; UWNIND: 0000005d {{.*}} _Z1giiii diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll index 5dce6752c065e1c..5354303a034d4ed 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s ; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" @@ -15,6 +16,39 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; } define hidden i32 @_Z1fv() local_unnamed_addr "sign-return-address"="non-leaf" { +; CHECK-LABEL: _Z1fv: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r6, r7, ra_auth_code, lr} +; CHECK-NEXT: push.w {r4, r6, r7, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r7, -12 +; CHECK-NEXT: .cfi_offset r6, -16 +; CHECK-NEXT: .cfi_offset r4, -20 +; CHECK-NEXT: .setfp r7, sp, #8 +; CHECK-NEXT: add r7, sp, #8 +; CHECK-NEXT: .cfi_def_cfa r7, 12 +; CHECK-NEXT: .pad #44 +; CHECK-NEXT: sub sp, #44 +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: bfc r4, #0, #5 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: mov r1, sp +; CHECK-NEXT: movs r0, #4 +; CHECK-NEXT: bl _Z1giPi +; CHECK-NEXT: ldm.w sp, {r0, r1, r2, r3} +; CHECK-NEXT: sub.w r4, r7, #8 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: add r0, r2 +; CHECK-NEXT: add r0, r3 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: pop.w {r4, r6, r7, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %a = alloca [4 x i32], align 32 %call = call i32 @_Z1giPi(i32 4, ptr nonnull %a) @@ -31,29 +65,6 @@ entry: ret i32 %add.3 } -; CHECK-LABEL: _Z1fv: -; CHECK: pac r12, lr, sp -; CHECK: .save {r4, r6, r7, lr} -; CHECK-NEXT: push {r4, r6, r7, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .cfi_offset r6, -12 -; CHECK-NEXT: .cfi_offset r4, -16 -; CHECK-NEXT: .setfp r7, sp, #8 -; CHECK-NEXT: add r7, sp, #8 -; CHECK-NEXT: .cfi_def_cfa r7, 8 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_offset ra_auth_code, -20 -; CHECK-NEXT: .pad #44 -; CHECK-NEXT: sub sp, #44 -; CHECK: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r4, r6, r7, lr} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr - - declare dso_local i32 @_Z1giPi(i32, ptr) local_unnamed_addr !llvm.module.flags = !{!0, !1, !2} @@ -64,6 +75,7 @@ declare dso_local i32 @_Z1giPi(i32, ptr) local_unnamed_addr ; UNWIND-LABEL: FunctionAddress: 0x0 ; UNWIND: 0x97 ; vsp = r7 -; UNWIND: 0x42 ; vsp = vsp - 12 -; UNWIND: 0xB4 ; pop ra_auth_code -; UNWIND: 0x84 0x0D ; pop {r4, r6, r7, lr} +; UNWIND-NEXT: 0x41 ; vsp = vsp - 8 +; UNWIND-NEXT: 0x80 0x0D ; pop {r4, r6, r7} +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll index cae38b5e4a5a1b6..c0c32de509b75d2 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll @@ -19,17 +19,14 @@ define i32 @test_non_leaf(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %x) "s ; CHECK-LABEL: test_non_leaf: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: pac r12, lr, sp -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .save {r7, ra_auth_code, lr} +; CHECK-NEXT: push.w {r7, r12, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: bl otherfn ; CHECK-NEXT: ldr r0, [sp, #16] ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r7, lr} +; CHECK-NEXT: pop.w {r7, r12, lr} ; CHECK-NEXT: aut r12, lr, sp ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll index d027c9e8c7b548f..2b7abfabf7035ab 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-none-eabi" @@ -5,6 +6,50 @@ target triple = "thumbv8.1m.main-arm-none-eabi" %"struct.std::__va_list" = type { ptr } define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 { +; CHECK-LABEL: _Z1fiz: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .save {r7, ra_auth_code, lr} +; CHECK-NEXT: push.w {r7, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset lr, -16 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .cfi_offset r7, -24 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 28 +; CHECK-NEXT: add.w r12, sp, #16 +; CHECK-NEXT: cmp r0, #1 +; CHECK-NEXT: stm.w r12, {r1, r2, r3} +; CHECK-NEXT: add r1, sp, #16 +; CHECK-NEXT: str r1, [sp] +; CHECK-NEXT: blt .LBB0_3 +; CHECK-NEXT: @ %bb.1: @ %for.body.lr.ph +; CHECK-NEXT: ldr r1, [sp] +; CHECK-NEXT: dls lr, r0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: adds r1, #4 +; CHECK-NEXT: .LBB0_2: @ %for.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: str r1, [sp] +; CHECK-NEXT: ldr r2, [r1, #-4] +; CHECK-NEXT: adds r1, #4 +; CHECK-NEXT: add r0, r2 +; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: b .LBB0_4 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: .LBB0_4: @ %for.cond.cleanup +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r7, r12, lr} +; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %ap = alloca %"struct.std::__va_list", align 4 call void @llvm.va_start(ptr nonnull %ap) @@ -33,34 +78,6 @@ for.body: ; preds = %for.body.lr.ph, %fo br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } -; CHECK-LABEL: _Z1fiz: -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .pad #12 -; CHECK-NEXT: sub sp, #12 -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset lr, -16 -; CHECK-NEXT: .cfi_offset r7, -20 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: .cfi_offset ra_auth_code, -24 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .cfi_def_cfa_offset 28 -; ... -; CHECK: add.w r[[N:[0-9]*]], sp, #16 -; CHECK: stm.w r[[N]], {r1, r2, r3} -; ... -; CHECK: add sp, #4 -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r7, lr} -; CHECK-NEXT: add sp, #12 -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr - declare void @llvm.va_start(ptr) #1 declare void @llvm.va_end(ptr) #1 diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll index 8019cd5b6109eb3..03b769f256bc28f 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s ; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" @@ -19,6 +20,50 @@ target triple = "thumbv8.1m.main-arm-none-eabi" %"struct.std::__va_list" = type { ptr } define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 { +; CHECK-LABEL: _Z1fiz: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .save {r4, r5, r7, ra_auth_code, lr} +; CHECK-NEXT: push.w {r4, r5, r7, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, -16 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .cfi_offset r7, -24 +; CHECK-NEXT: .cfi_offset r5, -28 +; CHECK-NEXT: .cfi_offset r4, -32 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: add r0, sp, #28 +; CHECK-NEXT: movs r5, #0 +; CHECK-NEXT: cmp r4, #1 +; CHECK-NEXT: stm r0!, {r1, r2, r3} +; CHECK-NEXT: add r0, sp, #28 +; CHECK-NEXT: str r0, [sp, #4] +; CHECK-NEXT: blt .LBB0_2 +; CHECK-NEXT: .LBB0_1: @ %for.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: adds r1, r0, #4 +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: ldr r0, [r0] +; CHECK-NEXT: bl _Z1gi +; CHECK-NEXT: add r5, r0 +; CHECK-NEXT: subs r4, #1 +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop.w {r4, r5, r7, r12, lr} +; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %ap = alloca %"struct.std::__va_list", align 4 call void @llvm.va_start(ptr nonnull %ap) @@ -47,36 +92,6 @@ for.body: ; preds = %for.body.lr.ph, %fo br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } -; CHECK-LABEL: _Z1fiz: -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .pad #12 -; CHECK-NEXT: sub sp, #12 -; CHECK-NEXT: .cfi_def_cfa_offset 12 -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 28 -; CHECK-NEXT: .cfi_offset lr, -16 -; CHECK-NEXT: .cfi_offset r7, -20 -; CHECK-NEXT: .cfi_offset r5, -24 -; CHECK-NEXT: .cfi_offset r4, -28 -; CHECK-NEXT: .save {ra_auth_code} -; CHECK-NEXT: str r12, [sp, #-4]! -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset ra_auth_code, -32 -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: .cfi_def_cfa_offset 40 -; ... -; CHECK: add r[[N:[0-9]*]], sp, #28 -; CHECK: stm r[[N]]!, {r1, r2, r3} -; ... -; CHECK: add sp, #8 -; CHECK-NEXT: ldr r12, [sp], #4 -; CHECK-NEXT: pop.w {r4, r5, r7, lr} -; CHECK-NEXT: add sp, #12 -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr - declare void @llvm.va_start(ptr) #1 declare void @llvm.va_end(ptr) #1 @@ -92,7 +107,9 @@ attributes #1 = { nounwind "sign-return-address"="non-leaf"} !2 = !{i32 8, !"sign-return-address-all", i32 0} ; UNWIND-LABEL: FunctionAddress -; UNWIND: 0x01 ; vsp = vsp + 8 -; UNWIND-NEXT: 0xB4 ; pop ra_auth_code -; UNWIND-NEXT: 0x84 0x0B ; pop {r4, r5, r7, lr} -; UNWIND-NEXT: 0x02 ; vsp = vsp + 12 +; UNWIND: 0x01 ; vsp = vsp + 8 +; UNWIND-NEXT: 0x80 0x0B ; pop {r4, r5, r7} +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} +; UNWIND-NEXT: 0x02 ; vsp = vsp + 12 + diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll index c1d17a7587be058..5eb5990be7c1183 100644 --- a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-none-eabi" @@ -14,6 +15,89 @@ target triple = "thumbv8.1m.main-arm-none-eabi" ; } define hidden i32 @f(i32 %n) local_unnamed_addr #0 { +; CHECK-LABEL: f: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, ra_auth_code, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r12, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; CHECK-NEXT: .cfi_offset r9, -12 +; CHECK-NEXT: .cfi_offset r8, -16 +; CHECK-NEXT: .cfi_offset r7, -20 +; CHECK-NEXT: .cfi_offset r6, -24 +; CHECK-NEXT: .cfi_offset r5, -28 +; CHECK-NEXT: .cfi_offset r4, -32 +; CHECK-NEXT: .setfp r7, sp, #12 +; CHECK-NEXT: add r7, sp, #12 +; CHECK-NEXT: .cfi_def_cfa r7, 20 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: movs r0, #7 +; CHECK-NEXT: add.w r0, r0, r5, lsl #2 +; CHECK-NEXT: bic r0, r0, #7 +; CHECK-NEXT: sub.w r4, sp, r0 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl g +; CHECK-NEXT: cmp r5, #1 +; CHECK-NEXT: blt .LBB0_3 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader +; CHECK-NEXT: subs r0, r5, #1 +; CHECK-NEXT: and r12, r5, #3 +; CHECK-NEXT: cmp r0, #3 +; CHECK-NEXT: bhs .LBB0_4 +; CHECK-NEXT: @ %bb.2: +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b .LBB0_6 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: b .LBB0_9 +; CHECK-NEXT: .LBB0_4: @ %for.body.preheader.new +; CHECK-NEXT: bic r0, r5, #3 +; CHECK-NEXT: movs r2, #1 +; CHECK-NEXT: subs r0, #4 +; CHECK-NEXT: sub.w r3, r4, #16 +; CHECK-NEXT: add.w lr, r2, r0, lsr #2 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: .LBB0_5: @ %for.body +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr r5, [r3, #16]! +; CHECK-NEXT: adds r2, #4 +; CHECK-NEXT: add r0, r5 +; CHECK-NEXT: ldrd r5, r1, [r3, #4] +; CHECK-NEXT: ldr r6, [r3, #12] +; CHECK-NEXT: add r0, r5 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: add r0, r6 +; CHECK-NEXT: le lr, .LBB0_5 +; CHECK-NEXT: .LBB0_6: @ %for.cond.cleanup.loopexit.unr-lcssa +; CHECK-NEXT: cmp.w r12, #0 +; CHECK-NEXT: beq .LBB0_9 +; CHECK-NEXT: @ %bb.7: @ %for.body.epil +; CHECK-NEXT: ldr.w r3, [r4, r2, lsl #2] +; CHECK-NEXT: cmp.w r12, #1 +; CHECK-NEXT: add r0, r3 +; CHECK-NEXT: beq .LBB0_9 +; CHECK-NEXT: @ %bb.8: @ %for.body.epil.1 +; CHECK-NEXT: add.w r2, r4, r2, lsl #2 +; CHECK-NEXT: cmp.w r12, #2 +; CHECK-NEXT: ldr r1, [r2, #4] +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: itt ne +; CHECK-NEXT: ldrne r1, [r2, #8] +; CHECK-NEXT: addne r0, r1 +; CHECK-NEXT: .LBB0_9: @ %for.cond.cleanup +; CHECK-NEXT: sub.w r4, r7, #12 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r12, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr entry: %vla = alloca i32, i32 %n, align 4 %call = call i32 @g(i32 %n, ptr nonnull %vla) #0 @@ -88,32 +172,6 @@ for.body.epil.2: ; preds = %for.body.epil.1 br label %for.cond.cleanup } -; CHECK-LABEL: f: -; CHECK: pac r12, lr, sp -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .cfi_def_cfa_offset 20 -; CHECK-NEXT: .cfi_offset lr, -4 -; CHECK-NEXT: .cfi_offset r7, -8 -; CHECK-NEXT: .cfi_offset r6, -12 -; CHECK-NEXT: .cfi_offset r5, -16 -; CHECK-NEXT: .cfi_offset r4, -20 -; CHECK-NEXT: .setfp r7, sp, #12 -; CHECK-NEXT: add r7, sp, #12 -; CHECK-NEXT: .cfi_def_cfa r7, 8 -; CHECK-NEXT: .save {r8, r9, ra_auth_code} -; CHECK-NEXT: push.w {r8, r9, r12} -; CHECK-NEXT: .cfi_offset ra_auth_code, -24 -; CHECK-NEXT: .cfi_offset r9, -28 -; CHECK-NEXT: .cfi_offset r8, -32 -; ... -; CHECK: sub.w r[[N:[0-9]*]], r7, #24 -; CHECK-NEXT: mov sp, r[[N]] -; CHECK-NEXT: pop.w {r8, r9, r12} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} -; CHECK-NEXT: aut r12, lr, sp -; CHECK-NEXT: bx lr - declare dso_local i32 @g(i32, ptr) local_unnamed_addr #0 attributes #0 = { nounwind "sign-return-address"="non-leaf"} From 933a56674e75ed372e000758378b7981e5b4d387 Mon Sep 17 00:00:00 2001 From: Jack Styles Date: Mon, 28 Oct 2024 09:08:48 +0000 Subject: [PATCH 145/425] [PAuthLR] Add Missing Break Statement for MachineOperand Switch Statement (#113883) There was a missing break, which led to an unannotated fallthrough when merging #112171. This has caused sanitizer builds to fail. This adds the missing break in the switch statement to ensure that the fallthrough does not occur. --- llvm/lib/CodeGen/MachineOperand.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp index 620779e2dec7086..d9e5e9d9d1e41f0 100644 --- a/llvm/lib/CodeGen/MachineOperand.cpp +++ b/llvm/lib/CodeGen/MachineOperand.cpp @@ -772,6 +772,7 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI, OS << "negate_ra_sign_state_with_pc "; if (MCSymbol *Label = CFI.getLabel()) MachineOperand::printSymbol(OS, *Label); + break; default: // TODO: Print the other CFI Operations. OS << ""; From 43a5719d9f54fb482d523a6e313d9b9b9af82379 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Mon, 28 Oct 2024 09:22:09 +0000 Subject: [PATCH 146/425] [RISCV] Use Sha extension in RVA23S64 profile (#113823) In the ratified version of the RVA23S64 definition, the Sha extension is now used to group together the set of hypervisor related extensions. --- llvm/lib/Target/RISCV/RISCVProfiles.td | 9 +-------- llvm/test/CodeGen/RISCV/attributes.ll | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVProfiles.td b/llvm/lib/Target/RISCV/RISCVProfiles.td index ce7d1973989fc13..78c076fdb0b26ed 100644 --- a/llvm/lib/Target/RISCV/RISCVProfiles.td +++ b/llvm/lib/Target/RISCV/RISCVProfiles.td @@ -82,14 +82,7 @@ defvar RVA23S64BaseFeatures = !listconcat(RVA22S64BaseFeatures, FeatureStdExtSscofpmf, FeatureStdExtSsnpm, FeatureStdExtSsu64xl, - FeatureStdExtH, - FeatureStdExtSsstateen, - FeatureStdExtShcounterenw, - FeatureStdExtShvstvala, - FeatureStdExtShtvala, - FeatureStdExtShvstvecd, - FeatureStdExtShvsatpa, - FeatureStdExtShgatpa]); + FeatureStdExtSha]); defvar RVA23S64Features = !listconcat(RVA23U64Features, RVA23S64BaseFeatures); diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 4cc9ef2ea0d7ff8..5c835befd6f5050 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -583,7 +583,7 @@ ; RVA22U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" ; RVA22S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscounterenw1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0_svinval1p0_svpbmt1p0" ; RVA23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_supm1p0" -; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_supm1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" +; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_sha1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_supm1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" ; RVB23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0" ; RVB23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0" ; RVM23U32: .attribute 5, "rv32i2p1_m2p0_zicbop1p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zimop1p0_zmmul1p0_zca1p0_zcb1p0_zce1p0_zcmop1p0_zcmp1p0_zcmt1p0_zba1p0_zbb1p0_zbs1p0" From 96f5c683500eb2d7f7c3984e3a056315c50c4662 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 28 Oct 2024 09:37:06 +0000 Subject: [PATCH 147/425] [RISCV] Lower @llvm.experimental.vector.compress for zvfhmin/zvfbfmin (#113770) This is a follow up to #113291 and handles f16/bf16 with zvfhmin and zvfbmin. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +- .../RISCV/rvv/fixed-vectors-compress-fp.ll | 90 +++++++++++- .../test/CodeGen/RISCV/rvv/vector-compress.ll | 134 +++++++++++++++++- 3 files changed, 224 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 69112d868bff827..af7a39b2580a372 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1081,7 +1081,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE, - ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, + ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE, + ISD::VECTOR_COMPRESS}, VT, Custom); MVT EltVT = VT.getVectorElementType(); if (isTypeLegal(EltVT)) @@ -1333,7 +1334,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UNDEF, VT, Custom); setOperationAction({ISD::CONCAT_VECTORS, ISD::VECTOR_REVERSE, - ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, + ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, + ISD::VECTOR_COMPRESS}, VT, Custom); // FIXME: mload, mstore, mgather, mscatter, vp_load/store, @@ -1440,8 +1442,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, VT, Custom); - - setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom); } // Custom-legalize bitcasts from fixed-length vectors to scalar types. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll index 8f1ff7ed4a11e23..3069d6011075703 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll @@ -1,6 +1,92 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s + +define <1 x bfloat> @vector_compress_v1bf16(<1 x bfloat> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <1 x bfloat> @llvm.experimental.vector.compress.v1bf16(<1 x bfloat> %v, <1 x i1> %mask, <1 x bfloat> undef) + ret <1 x bfloat> %ret +} + +define <1 x bfloat> @vector_compress_v1bf16_passthru(<1 x bfloat> %passthru, <1 x bfloat> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <1 x bfloat> @llvm.experimental.vector.compress.v1bf16(<1 x bfloat> %v, <1 x i1> %mask, <1 x bfloat> %passthru) + ret <1 x bfloat> %ret +} + +define <2 x bfloat> @vector_compress_v2bf16(<2 x bfloat> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <2 x bfloat> @llvm.experimental.vector.compress.v2bf16(<2 x bfloat> %v, <2 x i1> %mask, <2 x bfloat> undef) + ret <2 x bfloat> %ret +} + +define <2 x bfloat> @vector_compress_v2bf16_passthru(<2 x bfloat> %passthru, <2 x bfloat> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <2 x bfloat> @llvm.experimental.vector.compress.v2bf16(<2 x bfloat> %v, <2 x i1> %mask, <2 x bfloat> %passthru) + ret <2 x bfloat> %ret +} + +define <4 x bfloat> @vector_compress_v4bf16(<4 x bfloat> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <4 x bfloat> @llvm.experimental.vector.compress.v4bf16(<4 x bfloat> %v, <4 x i1> %mask, <4 x bfloat> undef) + ret <4 x bfloat> %ret +} + +define <4 x bfloat> @vector_compress_v4bf16_passthru(<4 x bfloat> %passthru, <4 x bfloat> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <4 x bfloat> @llvm.experimental.vector.compress.v4bf16(<4 x bfloat> %v, <4 x i1> %mask, <4 x bfloat> %passthru) + ret <4 x bfloat> %ret +} + +define <8 x bfloat> @vector_compress_v8bf16(<8 x bfloat> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call <8 x bfloat> @llvm.experimental.vector.compress.v8bf16(<8 x bfloat> %v, <8 x i1> %mask, <8 x bfloat> undef) + ret <8 x bfloat> %ret +} + +define <8 x bfloat> @vector_compress_v8bf16_passthru(<8 x bfloat> %passthru, <8 x bfloat> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <8 x bfloat> @llvm.experimental.vector.compress.v8bf16(<8 x bfloat> %v, <8 x i1> %mask, <8 x bfloat> %passthru) + ret <8 x bfloat> %ret +} define <1 x half> @vector_compress_v1f16(<1 x half> %v, <1 x i1> %mask) { ; CHECK-LABEL: vector_compress_v1f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll b/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll index 85d72ad2fe9cb4d..7516a72a92bc8bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh %s -o - | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s ; Vector compress for i8 type @@ -472,6 +474,134 @@ define @vector_compress_nxv8i64_passthru( % ret %ret } +; Vector compress for bf16 type + +define @vector_compress_nxv1bf16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1bf16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv1bf16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1bf16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv2bf16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2bf16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv2bf16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2bf16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv4bf16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4bf16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv4bf16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4bf16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv8bf16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8bf16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv8bf16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8bf16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv16bf16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16bf16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv16bf16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16bf16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv32bf16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv32bf16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv32bf16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv32bf16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v16, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv32bf16( %data, %mask, %passthru) + ret %ret +} + ; Vector compress for f16 type define @vector_compress_nxv1f16( %data, %mask) { From fa4790e404843cff4370cf259b93e51e8341469d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20Brku=C5=A1anin?= Date: Mon, 28 Oct 2024 10:43:18 +0100 Subject: [PATCH 148/425] [AMDGPU][MC] Fix disassembler for VIMAGE when non-first vaddr is v0 (#113569) For disassembler tables we use *V1_V4* variants for VIMAGE and then remove unused vaddr fields. *V1_V1* variant, which has every vaddr field other than vaddr0 set to 0, was also enabled and caused confusion when decoding cases which used v0 (whose encoded value is 0) --- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 10 ++++------ llvm/test/MC/AMDGPU/gfx12_asm_vimage.s | 12 ++++++++++++ .../MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt | 12 ++++++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index e0d1cde28452458..2f342365c3a5af8 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -567,8 +567,7 @@ multiclass MIMG_NoSampler_Src_Helper ; } else { - def _V1_gfx12 : VIMAGE_NoSampler_gfx12; + def _V1_gfx12 : VIMAGE_NoSampler_gfx12; } } } @@ -789,8 +788,7 @@ multiclass MIMG_Store_Addr_Helper ; + def _V1_gfx12 : VIMAGE_Store_gfx12 ; } } let VAddrDwords = 2 in { @@ -1017,9 +1015,9 @@ multiclass MIMG_Atomic_Addr_Helper_m ; + def _V1_gfx12 : VIMAGE_Atomic_gfx12 ; else - def _V1_gfx12 : VIMAGE_Atomic_gfx12_Renamed ; + def _V1_gfx12 : VIMAGE_Atomic_gfx12_Renamed ; } } let VAddrDwords = 2 in { diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s index 39010883a3c0b83..196d75db4260528 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s @@ -155,6 +155,9 @@ image_load v[0:2], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY th:TH_LO image_load v[0:2], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY th:TH_LOAD_HT scope:SCOPE_SE r128 a16 tfe d16 // GFX12: encoding: [0x75,0x00,0xc0,0xd3,0x00,0x10,0xa4,0x00,0x04,0x05,0x00,0x00] +image_load v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D +// GFX12: encoding: [0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00] + image_load_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] @@ -402,6 +405,9 @@ image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_NT_WB image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_BYPASS scope:SCOPE_SYS // GFX12: encoding: [0x00,0x80,0x41,0xd0,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00] +image_store v[1:4], [v2, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D +// GFX12: encoding: [0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] + image_store_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] @@ -559,6 +565,9 @@ image_atomic_swap v[3:4], [v4, v5], s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA a1 image_atomic_swap v[254:255], [v4, v5], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16 // GFX12: encoding: [0x47,0x80,0xc2,0xd0,0xfe,0xc0,0x00,0x00,0x04,0x05,0x00,0x00] +image_atomic_swap v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D +// GFX12: encoding: [0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] + image_atomic_cmpswap v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] @@ -613,6 +622,9 @@ image_atomic_add_uint v[254:255], [v4, v5, v6, v7], s[96:103] dmask:0x3 dim:SQ_R image_atomic_add_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT // GFX12: encoding: [0x00,0x00,0x43,0xd0,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00] +image_atomic_add_uint v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D +// GFX12: encoding: [0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] + image_atomic_sub_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt index aa49caacb4fccdc..08e9bef8cf67851 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt @@ -157,6 +157,9 @@ # GFX12: image_load v[0:2], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY th:TH_LOAD_HT scope:SCOPE_SE r128 a16 tfe d16 ; encoding: [0x75,0x00,0xc0,0xd3,0x00,0x10,0xa4,0x00,0x04,0x05,0x00,0x00] 0x75,0x00,0xc0,0xd3,0x00,0x10,0xa4,0x00,0x04,0x05,0x00,0x00 +# GFX12: image_load v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00] +0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00 + # GFX12: image_load_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] 0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00 @@ -403,6 +406,9 @@ # GFX12: image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x41,0xd0,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00] 0x00,0x80,0x41,0xd0,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00 +# GFX12: image_store v[1:4], [v2, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] +0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00 + # GFX12: image_store_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] 0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00 @@ -559,6 +565,9 @@ # GFX12: image_atomic_swap v[254:255], [v4, v5], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16 ; encoding: [0x47,0x80,0xc2,0xd0,0xfe,0xc0,0x00,0x00,0x04,0x05,0x00,0x00] 0x47,0x80,0xc2,0xd0,0xfe,0xc0,0x00,0x00,0x04,0x05,0x00,0x00 +# GFX12: image_atomic_swap v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] +0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00 + # GFX12: image_atomic_cmpswap v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 @@ -613,6 +622,9 @@ # GFX12: image_atomic_add_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT ; encoding: [0x00,0x00,0x43,0xd0,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00] 0x00,0x00,0x43,0xd0,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00 +# GFX12: image_atomic_add_uint v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] +0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00 + # GFX12: image_atomic_sub_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 From f78610af3feb88f0e1edb2482dc77490fb4cad77 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 28 Oct 2024 17:45:08 +0800 Subject: [PATCH 149/425] [InstCombine] Add function attribute `instcombine-no-verify-fixpoint` (#113822) This patch introduces a function attribute `instcombine-no-verify-fixpoint` to avoids disabling fix-point verification for unrelated tests in the same file. Address comment https://github.com/llvm/llvm-project/pull/112642#discussion_r1804714387. --- .../Transforms/InstCombine/InstructionCombining.cpp | 12 ++++++++---- .../ValueTracking/numsignbits-from-assume.ll | 5 +---- .../Transforms/InstCombine/2007-10-31-RangeCrash.ll | 7 ++++--- llvm/test/Transforms/InstCombine/cast_phi.ll | 4 ++-- .../InstCombine/constant-fold-iteration.ll | 5 +++-- llvm/test/Transforms/InstCombine/icmp-or.ll | 6 +++--- llvm/test/Transforms/InstCombine/pr55228.ll | 4 ++-- llvm/test/Transforms/InstCombine/shift.ll | 10 +++++----- llvm/test/Transforms/InstCombine/sink_instruction.ll | 11 +++++------ llvm/test/Transforms/InstCombine/zext-or-icmp.ll | 4 ++-- llvm/test/Transforms/PGOProfile/chr.ll | 10 +++++----- .../SimpleLoopUnswitch/2007-08-01-LCSSA.ll | 6 +++--- 12 files changed, 43 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 73a4705531781ab..2a54390c0f1882d 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -5464,6 +5464,8 @@ static bool combineInstructionsOverFunction( BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI, const InstCombineOptions &Opts) { auto &DL = F.getDataLayout(); + bool VerifyFixpoint = Opts.VerifyFixpoint && + !F.hasFnAttribute("instcombine-no-verify-fixpoint"); /// Builder - This is an IRBuilder that automatically inserts new /// instructions into the worklist when they are created. @@ -5488,7 +5490,7 @@ static bool combineInstructionsOverFunction( while (true) { ++Iteration; - if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) { + if (Iteration > Opts.MaxIterations && !VerifyFixpoint) { LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations << " on " << F.getName() << " reached; stopping without verifying fixpoint\n"); @@ -5510,9 +5512,11 @@ static bool combineInstructionsOverFunction( MadeIRChange = true; if (Iteration > Opts.MaxIterations) { report_fatal_error( - "Instruction Combining did not reach a fixpoint after " + - Twine(Opts.MaxIterations) + " iterations. " + - "Use 'instcombine' to suppress this error.", + "Instruction Combining on " + Twine(F.getName()) + + " did not reach a fixpoint after " + Twine(Opts.MaxIterations) + + " iterations. " + + "Use 'instcombine' or function attribute " + "'instcombine-no-verify-fixpoint' to suppress this error.", /*GenCrashDiag=*/false); } } diff --git a/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll b/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll index 95ac98532da6214..5beb0c7cadfbaa2 100644 --- a/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll +++ b/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll @@ -1,8 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='instcombine' -S | FileCheck %s - -; FIXME: This does not currently reach a fix point, because an assume can only -; be propagated backwards after its argument has been simplified. +; RUN: opt < %s -passes=instcombine -S | FileCheck %s define i32 @computeNumSignBits_add1(i32 %in) { ; CHECK-LABEL: @computeNumSignBits_add1( diff --git a/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll b/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll index 8b472aa5af09024..b5ae08e1daa3afa 100644 --- a/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll +++ b/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll @@ -1,13 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt < %s -S -passes='instcombine' | FileCheck %s +; RUN: opt < %s -S -passes=instcombine | FileCheck %s ; We do not reach a fixpoint, because we first have to infer nsw on the IV add, ; and could eliminate the icmp slt afterwards, but don't revisit it. target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128" -define i32 @test() { -; CHECK-LABEL: define i32 @test() { +define i32 @test() "instcombine-no-verify-fixpoint" { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 true, label [[BB_I:%.*]], label [[CALCULATECOLORSPECIFICBLACKLEVEL_EXIT:%.*]] ; CHECK: bb.i: diff --git a/llvm/test/Transforms/InstCombine/cast_phi.ll b/llvm/test/Transforms/InstCombine/cast_phi.ll index 2819b7d05f7b309..6b05edc31deb87a 100644 --- a/llvm/test/Transforms/InstCombine/cast_phi.ll +++ b/llvm/test/Transforms/InstCombine/cast_phi.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes="instcombine" -S | FileCheck %s +; RUN: opt < %s -passes=instcombine -S | FileCheck %s target datalayout = "n32:64" @@ -309,7 +309,7 @@ exit: ret i64 %r } -define i8 @trunc_in_loop_exit_block() { +define i8 @trunc_in_loop_exit_block() "instcombine-no-verify-fixpoint" { ; CHECK-LABEL: @trunc_in_loop_exit_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] diff --git a/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll b/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll index 7f800f614c47d7c..ed4fcc6ecaac723 100644 --- a/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll +++ b/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll @@ -1,11 +1,12 @@ -; RUN: opt < %s -passes='instcombine' -S -debug 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S -debug 2>&1 | FileCheck %s ; REQUIRES: asserts target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" ; This test disables fixpoint verification, because that would cause a second ; iteration for verification. -define i32 @a() nounwind readnone { +define i32 @a() nounwind readnone "instcombine-no-verify-fixpoint" { entry: %cmp = icmp eq i32 0, ptrtoint (ptr @a to i32) %ext = zext i1 %cmp to i32 diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll index 36b3216196f8467..56115f6d7d34144 100644 --- a/llvm/test/Transforms/InstCombine/icmp-or.ll +++ b/llvm/test/Transforms/InstCombine/icmp-or.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='instcombine' -S | FileCheck %s +; RUN: opt < %s -passes=instcombine -S | FileCheck %s declare void @use(i8) @@ -432,7 +432,7 @@ define i1 @icmp_or_xor_2_ne_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) { ; simplify `%cmp_1 = icmp eq i64 %xor, 0`, `%xor = xor i64 %x1, %y1` ; has one use which allows for complete simplification (rooted on ; `%or1 = or i1 %cmp, %cmp_1` so we don't end up adding it back). -define i1 @icmp_or_xor_2_3_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) { +define i1 @icmp_or_xor_2_3_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) "instcombine-no-verify-fixpoint" { ; CHECK-LABEL: @icmp_or_xor_2_3_fail( ; CHECK-NEXT: [[XOR:%.*]] = xor i64 [[X1:%.*]], [[Y1:%.*]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]] @@ -453,7 +453,7 @@ define i1 @icmp_or_xor_2_3_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) { ; negative test - xor multiuse -define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) { +define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) "instcombine-no-verify-fixpoint" { ; CHECK-LABEL: @icmp_or_xor_2_4_fail( ; CHECK-NEXT: [[XOR:%.*]] = xor i64 [[X1:%.*]], [[Y1:%.*]] ; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]] diff --git a/llvm/test/Transforms/InstCombine/pr55228.ll b/llvm/test/Transforms/InstCombine/pr55228.ll index 5e34c074346e3c6..c959bf16bcb5d52 100644 --- a/llvm/test/Transforms/InstCombine/pr55228.ll +++ b/llvm/test/Transforms/InstCombine/pr55228.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes='instcombine' < %s | FileCheck %s +; RUN: opt -S -passes=instcombine < %s | FileCheck %s ; This does not reach a fixpoint, because the global initializer is not in ; folded form. This will not happen if preceded by a GlobalOpt run. @@ -9,7 +9,7 @@ target datalayout = "p:8:8" @g = external global i8 @c = constant ptr getelementptr inbounds (i8, ptr @g, i64 1) -define i1 @test(ptr %p) { +define i1 @test(ptr %p) "instcombine-no-verify-fixpoint" { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[P:%.*]], getelementptr inbounds (i8, ptr @g, i64 1) ; CHECK-NEXT: ret i1 [[CMP]] diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll index 69f531e98f045b6..986e1073c638913 100644 --- a/llvm/test/Transforms/InstCombine/shift.ll +++ b/llvm/test/Transforms/InstCombine/shift.ll @@ -1,8 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='instcombine' -S | FileCheck %s - -; The fuzzer-generated @ashr_out_of_range test case does not reach a fixpoint, -; because a logical and it not relaxed to a bitwise and in one iteration. +; RUN: opt < %s -passes=instcombine -S | FileCheck %s declare void @use(i64) declare void @use_i32(i32) @@ -1735,9 +1732,12 @@ define i177 @lshr_out_of_range2(i177 %Y, ptr %A2, ptr %ptr) { ret i177 %B1 } +; The fuzzer-generated @ashr_out_of_range test case does not reach a fixpoint, +; because a logical and it not relaxed to a bitwise and in one iteration. + ; OSS Fuzz #5032 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=5032 -define void @ashr_out_of_range(ptr %A) { +define void @ashr_out_of_range(ptr %A) "instcombine-no-verify-fixpoint" { ; CHECK-LABEL: @ashr_out_of_range( ; CHECK-NEXT: [[L:%.*]] = load i177, ptr [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i177 [[L]], -1 diff --git a/llvm/test/Transforms/InstCombine/sink_instruction.ll b/llvm/test/Transforms/InstCombine/sink_instruction.ll index dac40852c4bdcbf..cb9a3069ca5fd4b 100644 --- a/llvm/test/Transforms/InstCombine/sink_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_instruction.ll @@ -1,9 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='instcombine' -S < %s | FileCheck %s - -; We fail to reach a fixpoint, because sunk instructions get revisited too -; early. In @test2 the sunk add is revisited before the dominating condition -; is visited and added to the DomConditionCache. +; RUN: opt -passes=instcombine -S < %s | FileCheck %s ;; This tests that the instructions in the entry blocks are sunk into each ;; arm of the 'if'. @@ -31,9 +27,12 @@ endif: ; preds = %entry ret i32 %tmp.2 } +; We fail to reach a fixpoint, because sunk instructions get revisited too +; early. In @test2 the sunk add is revisited before the dominating condition +; is visited and added to the DomConditionCache. ;; PHI use, sink divide before call. -define i32 @test2(i32 %x) nounwind ssp { +define i32 @test2(i32 %x) nounwind ssp "instcombine-no-verify-fixpoint" { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[BB:%.*]] diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll index acf547b55722fc9..feb4be9e370505c 100644 --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='instcombine' -S | FileCheck %s +; RUN: opt < %s -passes=instcombine -S | FileCheck %s define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) { ; CHECK-LABEL: @zext_or_icmp_icmp( @@ -168,7 +168,7 @@ define i32 @PR49475(i32 %x, i16 %y) { ; This would infinite-loop. -define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) { +define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) "instcombine-no-verify-fixpoint" { ; CHECK-LABEL: @PR49475_infloop( ; CHECK-NEXT: [[B2:%.*]] = icmp eq i16 [[INSERT:%.*]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[T0:%.*]], 1 diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll index 34e39fe37979ac1..46f9a2bde7a2352 100644 --- a/llvm/test/Transforms/PGOProfile/chr.ll +++ b/llvm/test/Transforms/PGOProfile/chr.ll @@ -1,8 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes='require,function(chr,instcombine,simplifycfg)' -S | FileCheck %s - -; FIXME: This does not currently reach a fix point, because we don't make use -; of a freeze that is pushed up the instruction chain later. +; RUN: opt < %s -passes='require,function(chr,instcombine,simplifycfg)' -S | FileCheck %s declare void @foo() declare void @bar() @@ -1910,6 +1907,9 @@ bb4: ret i32 %v13 } +; FIXME: This does not currently reach a fix point, because we don't make use +; of a freeze that is pushed up the instruction chain later. + ; Test the case where two scopes share a common instruction to hoist (%cmp.i). ; Two scopes would hoist it to their hoist points, but since the outer scope ; hoists (entry/bb6-9) it first to its hoist point, it'd be wrong (causing bad @@ -1928,7 +1928,7 @@ bb4: ; foo(); ; } ; return 45; -define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 { +define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) "instcombine-no-verify-fixpoint" !prof !14 { ; CHECK-LABEL: @test_chr_21( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[I_FR:%.*]] = freeze i64 [[I:%.*]] diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll index 9e4b33d4c4d5e55..fb342322b2da7af 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt < %s -S -passes='loop(simple-loop-unswitch),instcombine' -verify-memoryssa | FileCheck %s +; RUN: opt < %s -S -passes='loop(simple-loop-unswitch),instcombine' -verify-memoryssa | FileCheck %s ; We do not reach a fixpoint, because we first have to infer nsw on the IV add, ; and could eliminate the icmp slt afterwards, but don't revisit it. @@ -8,9 +8,9 @@ declare i32 @strcmp(ptr, ptr) -define i32 @_ZN9Generator6strregEPKc(ptr %this, ptr %s) { +define i32 @_ZN9Generator6strregEPKc(ptr %this, ptr %s) "instcombine-no-verify-fixpoint" { ; CHECK-LABEL: define i32 @_ZN9Generator6strregEPKc( -; CHECK-SAME: ptr [[THIS:%.*]], ptr [[S:%.*]]) { +; CHECK-SAME: ptr [[THIS:%.*]], ptr [[S:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP122:%.*]] = icmp eq ptr [[S]], null ; CHECK-NEXT: br label [[BB184:%.*]] From 09c258ef6a2fcca2161488b214d53ef39891fa22 Mon Sep 17 00:00:00 2001 From: Adrian Vogelsgesang Date: Mon, 28 Oct 2024 11:01:57 +0100 Subject: [PATCH 150/425] [NFC][lldb-dap] Clean-up includes (#113839) This commit cleans up the includes in the `lldb-dap` subfolder. The main motivation was that I got annoyed by `clangd` always complaining about unused includes while working on lldb-dap. --- lldb/tools/lldb-dap/Breakpoint.cpp | 1 + lldb/tools/lldb-dap/Breakpoint.h | 1 + lldb/tools/lldb-dap/BreakpointBase.cpp | 3 +-- lldb/tools/lldb-dap/BreakpointBase.h | 2 -- lldb/tools/lldb-dap/DAP.cpp | 5 ++++- lldb/tools/lldb-dap/DAP.h | 19 ------------------- lldb/tools/lldb-dap/FifoFiles.cpp | 6 +----- lldb/tools/lldb-dap/FifoFiles.h | 4 +--- lldb/tools/lldb-dap/FunctionBreakpoint.cpp | 1 + lldb/tools/lldb-dap/IOStream.cpp | 1 - lldb/tools/lldb-dap/IOStream.h | 2 -- lldb/tools/lldb-dap/InstructionBreakpoint.cpp | 1 + lldb/tools/lldb-dap/InstructionBreakpoint.h | 1 - lldb/tools/lldb-dap/JSONUtils.cpp | 6 +----- lldb/tools/lldb-dap/LLDBUtils.cpp | 2 ++ lldb/tools/lldb-dap/OutputRedirector.h | 2 -- lldb/tools/lldb-dap/RunInTerminal.cpp | 5 +---- lldb/tools/lldb-dap/RunInTerminal.h | 4 +++- lldb/tools/lldb-dap/SourceBreakpoint.cpp | 1 + lldb/tools/lldb-dap/lldb-dap.cpp | 10 +++++----- 20 files changed, 24 insertions(+), 53 deletions(-) diff --git a/lldb/tools/lldb-dap/Breakpoint.cpp b/lldb/tools/lldb-dap/Breakpoint.cpp index 0c33d4b114d7602..9ea7a42ca85a1ef 100644 --- a/lldb/tools/lldb-dap/Breakpoint.cpp +++ b/lldb/tools/lldb-dap/Breakpoint.cpp @@ -9,6 +9,7 @@ #include "Breakpoint.h" #include "DAP.h" #include "JSONUtils.h" +#include "lldb/API/SBBreakpointLocation.h" #include "llvm/ADT/StringExtras.h" using namespace lldb_dap; diff --git a/lldb/tools/lldb-dap/Breakpoint.h b/lldb/tools/lldb-dap/Breakpoint.h index 47a9d9c59ae2b76..ee9d3736d6190fb 100644 --- a/lldb/tools/lldb-dap/Breakpoint.h +++ b/lldb/tools/lldb-dap/Breakpoint.h @@ -10,6 +10,7 @@ #define LLDB_TOOLS_LLDB_DAP_BREAKPOINT_H #include "BreakpointBase.h" +#include "lldb/API/SBBreakpoint.h" namespace lldb_dap { diff --git a/lldb/tools/lldb-dap/BreakpointBase.cpp b/lldb/tools/lldb-dap/BreakpointBase.cpp index 519729f5519ffcd..f3cb06a3562d485 100644 --- a/lldb/tools/lldb-dap/BreakpointBase.cpp +++ b/lldb/tools/lldb-dap/BreakpointBase.cpp @@ -7,8 +7,7 @@ //===----------------------------------------------------------------------===// #include "BreakpointBase.h" -#include "DAP.h" -#include "llvm/ADT/StringExtras.h" +#include "JSONUtils.h" using namespace lldb_dap; diff --git a/lldb/tools/lldb-dap/BreakpointBase.h b/lldb/tools/lldb-dap/BreakpointBase.h index 5a04bb201615fc8..79301480e0e5888 100644 --- a/lldb/tools/lldb-dap/BreakpointBase.h +++ b/lldb/tools/lldb-dap/BreakpointBase.h @@ -9,10 +9,8 @@ #ifndef LLDB_TOOLS_LLDB_DAP_BREAKPOINTBASE_H #define LLDB_TOOLS_LLDB_DAP_BREAKPOINTBASE_H -#include "lldb/API/SBBreakpoint.h" #include "llvm/Support/JSON.h" #include -#include namespace lldb_dap { diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp index 68559e382006db8..283392270ba26c9 100644 --- a/lldb/tools/lldb-dap/DAP.cpp +++ b/lldb/tools/lldb-dap/DAP.cpp @@ -10,11 +10,14 @@ #include #include #include -#include #include "DAP.h" +#include "JSONUtils.h" #include "LLDBUtils.h" #include "lldb/API/SBCommandInterpreter.h" +#include "lldb/API/SBLanguageRuntime.h" +#include "lldb/API/SBListener.h" +#include "lldb/API/SBStream.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/FormatVariadic.h" diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h index acc10ade75fd147..dab4ce44ab202cb 100644 --- a/lldb/tools/lldb-dap/DAP.h +++ b/lldb/tools/lldb-dap/DAP.h @@ -9,16 +9,10 @@ #ifndef LLDB_TOOLS_LLDB_DAP_DAP_H #define LLDB_TOOLS_LLDB_DAP_DAP_H -#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX - -#include -#include #include -#include #include #include #include -#include #include #include "llvm/ADT/DenseMap.h" @@ -30,24 +24,12 @@ #include "llvm/Support/raw_ostream.h" #include "lldb/API/SBAttachInfo.h" -#include "lldb/API/SBBreakpoint.h" -#include "lldb/API/SBBreakpointLocation.h" #include "lldb/API/SBCommandInterpreter.h" #include "lldb/API/SBCommandReturnObject.h" -#include "lldb/API/SBCommunication.h" #include "lldb/API/SBDebugger.h" #include "lldb/API/SBEvent.h" #include "lldb/API/SBFormat.h" -#include "lldb/API/SBHostOS.h" -#include "lldb/API/SBInstruction.h" -#include "lldb/API/SBInstructionList.h" -#include "lldb/API/SBLanguageRuntime.h" #include "lldb/API/SBLaunchInfo.h" -#include "lldb/API/SBLineEntry.h" -#include "lldb/API/SBListener.h" -#include "lldb/API/SBProcess.h" -#include "lldb/API/SBStream.h" -#include "lldb/API/SBStringList.h" #include "lldb/API/SBTarget.h" #include "lldb/API/SBThread.h" @@ -56,7 +38,6 @@ #include "IOStream.h" #include "InstructionBreakpoint.h" #include "ProgressEvent.h" -#include "RunInTerminal.h" #include "SourceBreakpoint.h" #define VARREF_LOCALS (int64_t)1 diff --git a/lldb/tools/lldb-dap/FifoFiles.cpp b/lldb/tools/lldb-dap/FifoFiles.cpp index 9a6423f79471a40..1f1bba80bd3b113 100644 --- a/lldb/tools/lldb-dap/FifoFiles.cpp +++ b/lldb/tools/lldb-dap/FifoFiles.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "FifoFiles.h" +#include "JSONUtils.h" #if !defined(_WIN32) #include @@ -18,11 +19,6 @@ #include #include #include -#include - -#include "llvm/Support/FileSystem.h" - -#include "lldb/lldb-defines.h" using namespace llvm; diff --git a/lldb/tools/lldb-dap/FifoFiles.h b/lldb/tools/lldb-dap/FifoFiles.h index 02a97cd5cbbd23c..633ebeb2aedd45b 100644 --- a/lldb/tools/lldb-dap/FifoFiles.h +++ b/lldb/tools/lldb-dap/FifoFiles.h @@ -9,10 +9,8 @@ #ifndef LLDB_TOOLS_LLDB_DAP_FIFOFILES_H #define LLDB_TOOLS_LLDB_DAP_FIFOFILES_H -#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/Support/Error.h" - -#include "JSONUtils.h" +#include "llvm/Support/JSON.h" #include diff --git a/lldb/tools/lldb-dap/FunctionBreakpoint.cpp b/lldb/tools/lldb-dap/FunctionBreakpoint.cpp index 21743bf908706d0..216c685f633da80 100644 --- a/lldb/tools/lldb-dap/FunctionBreakpoint.cpp +++ b/lldb/tools/lldb-dap/FunctionBreakpoint.cpp @@ -8,6 +8,7 @@ #include "FunctionBreakpoint.h" #include "DAP.h" +#include "JSONUtils.h" namespace lldb_dap { diff --git a/lldb/tools/lldb-dap/IOStream.cpp b/lldb/tools/lldb-dap/IOStream.cpp index 96e9a1ed49532f8..d2e8ec40b0a7b85 100644 --- a/lldb/tools/lldb-dap/IOStream.cpp +++ b/lldb/tools/lldb-dap/IOStream.cpp @@ -18,7 +18,6 @@ #include #include -#include using namespace lldb_dap; diff --git a/lldb/tools/lldb-dap/IOStream.h b/lldb/tools/lldb-dap/IOStream.h index b62502419182cd3..57d5fd458b7165d 100644 --- a/lldb/tools/lldb-dap/IOStream.h +++ b/lldb/tools/lldb-dap/IOStream.h @@ -9,8 +9,6 @@ #ifndef LLDB_TOOLS_LLDB_DAP_IOSTREAM_H #define LLDB_TOOLS_LLDB_DAP_IOSTREAM_H -#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX - #if defined(_WIN32) // We need to #define NOMINMAX in order to skip `min()` and `max()` macro // definitions that conflict with other system headers. diff --git a/lldb/tools/lldb-dap/InstructionBreakpoint.cpp b/lldb/tools/lldb-dap/InstructionBreakpoint.cpp index de4f6f5d86717f6..e3a8460bb7b3014 100644 --- a/lldb/tools/lldb-dap/InstructionBreakpoint.cpp +++ b/lldb/tools/lldb-dap/InstructionBreakpoint.cpp @@ -9,6 +9,7 @@ #include "InstructionBreakpoint.h" #include "DAP.h" +#include "JSONUtils.h" namespace lldb_dap { diff --git a/lldb/tools/lldb-dap/InstructionBreakpoint.h b/lldb/tools/lldb-dap/InstructionBreakpoint.h index cf1516f46e9551f..53912af46ca1480 100644 --- a/lldb/tools/lldb-dap/InstructionBreakpoint.h +++ b/lldb/tools/lldb-dap/InstructionBreakpoint.h @@ -11,7 +11,6 @@ #define LLDB_TOOLS_LLDB_DAP_INSTRUCTIONBREAKPOINT_H #include "Breakpoint.h" -#include "llvm/ADT/StringRef.h" namespace lldb_dap { diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index e42a6d9d6998045..fd09e4ae505e6d2 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -6,25 +6,21 @@ // //===----------------------------------------------------------------------===// -#include #include #include #include #include #include "llvm/ADT/StringRef.h" -#include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" -#include "lldb/API/SBBreakpoint.h" -#include "lldb/API/SBBreakpointLocation.h" #include "lldb/API/SBDeclaration.h" +#include "lldb/API/SBStream.h" #include "lldb/API/SBStringList.h" #include "lldb/API/SBStructuredData.h" #include "lldb/API/SBValue.h" -#include "lldb/Host/PosixApi.h" #include "DAP.h" #include "ExceptionBreakpoint.h" diff --git a/lldb/tools/lldb-dap/LLDBUtils.cpp b/lldb/tools/lldb-dap/LLDBUtils.cpp index b38833c0fdb6b6f..2ffcba7dff4f248 100644 --- a/lldb/tools/lldb-dap/LLDBUtils.cpp +++ b/lldb/tools/lldb-dap/LLDBUtils.cpp @@ -8,6 +8,8 @@ #include "LLDBUtils.h" #include "DAP.h" +#include "JSONUtils.h" +#include "lldb/API/SBStringList.h" #include diff --git a/lldb/tools/lldb-dap/OutputRedirector.h b/lldb/tools/lldb-dap/OutputRedirector.h index dba51016775bf45..e26d1648b104f9d 100644 --- a/lldb/tools/lldb-dap/OutputRedirector.h +++ b/lldb/tools/lldb-dap/OutputRedirector.h @@ -9,8 +9,6 @@ #ifndef LLDB_TOOLS_LLDB_DAP_OUTPUT_REDIRECTOR_H #define LLDB_TOOLS_LLDB_DAP_OUTPUT_REDIRECTOR_H -#include - #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" diff --git a/lldb/tools/lldb-dap/RunInTerminal.cpp b/lldb/tools/lldb-dap/RunInTerminal.cpp index ad019b8a56a4fa6..4fe09e2885a8e5e 100644 --- a/lldb/tools/lldb-dap/RunInTerminal.cpp +++ b/lldb/tools/lldb-dap/RunInTerminal.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "RunInTerminal.h" +#include "JSONUtils.h" #if !defined(_WIN32) #include @@ -15,14 +16,10 @@ #endif #include -#include #include -#include #include "llvm/Support/FileSystem.h" -#include "lldb/lldb-defines.h" - using namespace llvm; namespace lldb_dap { diff --git a/lldb/tools/lldb-dap/RunInTerminal.h b/lldb/tools/lldb-dap/RunInTerminal.h index 2fbe3acbb408427..b20f8beb6071dd9 100644 --- a/lldb/tools/lldb-dap/RunInTerminal.h +++ b/lldb/tools/lldb-dap/RunInTerminal.h @@ -10,9 +10,11 @@ #define LLDB_TOOLS_LLDB_DAP_RUNINTERMINAL_H #include "FifoFiles.h" +#include "lldb/API/SBError.h" #include -#include +#include +#include namespace lldb_dap { diff --git a/lldb/tools/lldb-dap/SourceBreakpoint.cpp b/lldb/tools/lldb-dap/SourceBreakpoint.cpp index f5dd1346cb9e543..d1a3a5bedb0ae29 100644 --- a/lldb/tools/lldb-dap/SourceBreakpoint.cpp +++ b/lldb/tools/lldb-dap/SourceBreakpoint.cpp @@ -8,6 +8,7 @@ #include "SourceBreakpoint.h" #include "DAP.h" +#include "JSONUtils.h" namespace lldb_dap { diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp index f70b0d3d4cbee07..a2f7be2b214e4ae 100644 --- a/lldb/tools/lldb-dap/lldb-dap.cpp +++ b/lldb/tools/lldb-dap/lldb-dap.cpp @@ -7,9 +7,14 @@ //===----------------------------------------------------------------------===// #include "DAP.h" +#include "FifoFiles.h" +#include "RunInTerminal.h" #include "Watchpoint.h" #include "lldb/API/SBDeclaration.h" +#include "lldb/API/SBInstruction.h" +#include "lldb/API/SBListener.h" #include "lldb/API/SBMemoryRegionInfo.h" +#include "lldb/API/SBStringList.h" #include "llvm/Support/Base64.h" #include @@ -43,17 +48,12 @@ #include #include -#include -#include #include #include -#include #include -#include #include #include -#include "lldb/API/SBEnvironment.h" #include "lldb/API/SBStream.h" #include "lldb/Host/Config.h" #include "llvm/ADT/ArrayRef.h" From e6fcf349eff5be326e1923567646edf99f69d297 Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Mon, 28 Oct 2024 18:28:52 +0800 Subject: [PATCH 151/425] [clang-tidy] Remove unused variable (NFC) /llvm-project/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp:60:29: error: unused variable 'AlgoName' [-Werror,-Wunused-variable] const llvm::StringRef AlgoName = ClassTemplate->getName(); ^ 1 error generated. --- .../bugprone/NondeterministicPointerIterationOrderCheck.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp index 0797376d0aa9ff8..22ecd689614696f 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp @@ -57,7 +57,6 @@ void NondeterministicPointerIterationOrderCheck::check( "recorddecl")) { const TemplateArgumentList &TemplateArgs = ClassTemplate->getTemplateArgs(); - const llvm::StringRef AlgoName = ClassTemplate->getName(); const bool IsAlgoArgPointer = TemplateArgs[0].getAsType()->isPointerType(); From f5d8a485e216996b076369b4d66c41a70fe34ea4 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Mon, 28 Oct 2024 18:30:21 +0800 Subject: [PATCH 152/425] [RISCV] Fix typo in UserGuides.rst. NFC (#113861) --- llvm/docs/UserGuides.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst index 86101ffbd9ca5dd..0b204d512876a37 100644 --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -286,7 +286,7 @@ Additional Topics DirectX runtime. :doc:`RISCVUsage` - This document describes using the RISCV-V target. + This document describes using the RISC-V target. :doc:`RISCV/RISCVVectorExtension` This document describes how the RISC-V Vector extension can be expressed in LLVM IR and how code is generated for it in the backend. From 09160a98218e7f1038e06c5d8e704a826ed0ae13 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Mon, 28 Oct 2024 11:32:39 +0100 Subject: [PATCH 153/425] [SystemZ] Silence compiler warning (#113894) Use SystemZ::NoRegister instead of 0 in SystemZTargetLowering::getRegisterByName(). --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 3e05f3b0180a782..1fa2dbfb26fc25a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1400,9 +1400,11 @@ SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const { Register Reg = StringSwitch(RegName) - .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0) - .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0) - .Default(0); + .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D + : SystemZ::NoRegister) + .Case("r15", + Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister) + .Default(SystemZ::NoRegister); if (Reg) return Reg; From 64148944c56f50a50383c79d5b6d074d8608f2a4 Mon Sep 17 00:00:00 2001 From: SpencerAbson Date: Mon, 28 Oct 2024 10:41:07 +0000 Subject: [PATCH 154/425] [AArch64] Add assembly/disassembly for zeroing SVE2 integer instructions (#113473) This patch adds assembly/disassembly for the following SVE2.2 instructions - SQABS (zeroing) - SQNEG (zeroing) - URECPE (zeroing) - USQRTE (zeroing) - Refactor the existing merging forms to remove the now redundant bit 17 argument. - In accordance with: https://developer.arm.com/documentation/ddi0602/latest/ --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 14 +++-- llvm/lib/Target/AArch64/SVEInstrFormats.td | 52 ++++++++++++++----- llvm/test/MC/AArch64/SVE2/sqabs-diagnostics.s | 2 +- llvm/test/MC/AArch64/SVE2/sqneg-diagnostics.s | 2 +- .../test/MC/AArch64/SVE2/urecpe-diagnostics.s | 2 +- .../MC/AArch64/SVE2/ursqrte-diagnostics.s | 2 +- .../MC/AArch64/SVE2p2/sqabs_z-diagnostics.s | 37 +++++++++++++ llvm/test/MC/AArch64/SVE2p2/sqabs_z.s | 39 ++++++++++++++ .../MC/AArch64/SVE2p2/sqneg_z-diagnostics.s | 37 +++++++++++++ llvm/test/MC/AArch64/SVE2p2/sqneg_z.s | 39 ++++++++++++++ .../MC/AArch64/SVE2p2/urecpe_z-diagnostics.s | 47 +++++++++++++++++ llvm/test/MC/AArch64/SVE2p2/urecpe_z.s | 33 ++++++++++++ .../MC/AArch64/SVE2p2/ursqrte_z-diagnostics.s | 47 +++++++++++++++++ llvm/test/MC/AArch64/SVE2p2/ursqrte_z.s | 33 ++++++++++++ 14 files changed, 366 insertions(+), 20 deletions(-) create mode 100644 llvm/test/MC/AArch64/SVE2p2/sqabs_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/sqabs_z.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/sqneg_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/sqneg_z.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/urecpe_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/urecpe_z.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/ursqrte_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/ursqrte_z.s diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index bf6e25438633c7d..dc96b249c4e40ce 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3586,10 +3586,10 @@ let Predicates = [HasSVE2orSME] in { defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp", int_aarch64_sve_uminp>; // SVE2 integer unary operations (predicated) - defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe", int_aarch64_sve_urecpe>; - defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte", int_aarch64_sve_ursqrte>; - defm SQABS_ZPmZ : sve2_int_un_pred_arit<0b100, "sqabs", int_aarch64_sve_sqabs>; - defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg", int_aarch64_sve_sqneg>; + defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b00, "urecpe", int_aarch64_sve_urecpe>; + defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b01, "ursqrte", int_aarch64_sve_ursqrte>; + defm SQABS_ZPmZ : sve2_int_un_pred_arit< 0b10, "sqabs", int_aarch64_sve_sqabs>; + defm SQNEG_ZPmZ : sve2_int_un_pred_arit< 0b11, "sqneg", int_aarch64_sve_sqneg>; // SVE2 saturating add/subtract defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", int_aarch64_sve_sqadd>; @@ -4236,6 +4236,12 @@ let Predicates = [HasSVE2p2orSME2p2] in { // Signed integer base 2 logarithm of fp value, zeroing predicate defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb">; + // SVE2 integer unary operations, zeroing predicate + def URECPE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b00, "urecpe", ZPR32>; + def URSQRTE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b01, "ursqrte", ZPR32>; + defm SQABS_ZPzZ : sve2_int_un_pred_arit_z<0b10, "sqabs">; + defm SQNEG_ZPzZ : sve2_int_un_pred_arit_z<0b11, "sqneg">; + // Floating point round to integral fp value in integer size range // Merging defm FRINT32Z_ZPmZ : sve_fp_2op_p_zd_frint<0b00, "frint32z">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index af5c96eb5c8c561..02ee0fe9244572c 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3964,7 +3964,7 @@ multiclass sve2_int_sadd_long_accum_pairwise(NAME # _D)>; } -class sve2_int_un_pred_arit sz, bit Q, bits<2> opc, +class sve2_int_un_pred_arit sz, bits<2> opc, string asm, ZPRRegOp zprty> : I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn), asm, "\t$Zd, $Pg/m, $Zn", @@ -3976,23 +3976,44 @@ class sve2_int_un_pred_arit sz, bit Q, bits<2> opc, let Inst{31-24} = 0b01000100; let Inst{23-22} = sz; let Inst{21-20} = 0b00; - let Inst{19} = Q; - let Inst{18} = 0b0; - let Inst{17-16} = opc; + let Inst{19} = opc{1}; + let Inst{18-17} = 0b00; + let Inst{16} = opc{0}; let Inst{15-13} = 0b101; let Inst{12-10} = Pg; let Inst{9-5} = Zn; let Inst{4-0} = Zd; - let Constraints = "$Zd = $_Zd"; let DestructiveInstType = DestructiveUnaryPassthru; let ElementSize = zprty.ElementSize; let hasSideEffects = 0; } -multiclass sve2_int_un_pred_arit_s opc, string asm, +class sve2_int_un_pred_arit_z sz, bits<2> opc, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg/z, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b01000100; + let Inst{23-22} = sz; + let Inst{21-20} = 0b00; + let Inst{19} = opc{1}; + let Inst{18-17} = 0b01; + let Inst{16} = opc{0}; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + let hasSideEffects = 0; +} + +multiclass sve2_int_un_pred_arit_s opc, string asm, SDPatternOperator op> { - def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>, + def _S : sve2_int_un_pred_arit<0b10, opc, asm, ZPR32>, SVEPseudo2Instr; def : SVE_3_Op_Pat(NAME # _S)>; @@ -4002,14 +4023,14 @@ multiclass sve2_int_un_pred_arit_s opc, string asm, defm : SVE_3_Op_Undef_Pat(NAME # _S_UNDEF)>; } -multiclass sve2_int_un_pred_arit opc, string asm, SDPatternOperator op> { - def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>, +multiclass sve2_int_un_pred_arit opc, string asm, SDPatternOperator op> { + def _B : sve2_int_un_pred_arit<0b00, opc, asm, ZPR8>, SVEPseudo2Instr; - def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>, + def _H : sve2_int_un_pred_arit<0b01, opc, asm, ZPR16>, SVEPseudo2Instr; - def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>, + def _S : sve2_int_un_pred_arit<0b10, opc, asm, ZPR32>, SVEPseudo2Instr; - def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>, + def _D : sve2_int_un_pred_arit<0b11, opc, asm, ZPR64>, SVEPseudo2Instr; def : SVE_3_Op_Pat(NAME # _B)>; @@ -4028,6 +4049,13 @@ multiclass sve2_int_un_pred_arit opc, string asm, SDPatternOperator op> defm : SVE_3_Op_Undef_Pat(NAME # _D_UNDEF)>; } +multiclass sve2_int_un_pred_arit_z opc, string asm> { + def _B : sve2_int_un_pred_arit_z<0b00, opc, asm, ZPR8>; + def _H : sve2_int_un_pred_arit_z<0b01, opc, asm, ZPR16>; + def _S : sve2_int_un_pred_arit_z<0b10, opc, asm, ZPR32>; + def _D : sve2_int_un_pred_arit_z<0b11, opc, asm, ZPR64>; +} + //===----------------------------------------------------------------------===// // SVE2 Widening Integer Arithmetic Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AArch64/SVE2/sqabs-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqabs-diagnostics.s index 8b3a136a7d74293..7dd268dd7cddd19 100644 --- a/llvm/test/MC/AArch64/SVE2/sqabs-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/sqabs-diagnostics.s @@ -4,7 +4,7 @@ // Invalid predicate sqabs z0.s, p0/z, z1.s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: sqabs z0.s, p0/z, z1.s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2/sqneg-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqneg-diagnostics.s index 7b0f5722b94a3fa..372adad0427c092 100644 --- a/llvm/test/MC/AArch64/SVE2/sqneg-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/sqneg-diagnostics.s @@ -4,7 +4,7 @@ // Invalid predicate sqneg z0.s, p0/z, z1.s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: sqneg z0.s, p0/z, z1.s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2/urecpe-diagnostics.s b/llvm/test/MC/AArch64/SVE2/urecpe-diagnostics.s index f04538494cd6f05..73bb6cecffa5a41 100644 --- a/llvm/test/MC/AArch64/SVE2/urecpe-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/urecpe-diagnostics.s @@ -4,7 +4,7 @@ // Invalid predicate urecpe z0.s, p0/z, z1.s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: urecpe z0.s, p0/z, z1.s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2/ursqrte-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ursqrte-diagnostics.s index 2190ff1ebd82ba4..0c6746a4a7c1fe3 100644 --- a/llvm/test/MC/AArch64/SVE2/ursqrte-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/ursqrte-diagnostics.s @@ -4,7 +4,7 @@ // Invalid predicate ursqrte z0.s, p0/z, z1.s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: ursqrte z0.s, p0/z, z1.s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/sqabs_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/sqabs_z-diagnostics.s new file mode 100644 index 000000000000000..8c00a74c69c8c63 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/sqabs_z-diagnostics.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid element width + +sqabs z31.b, p7/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sqabs z31.b, p7/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqabs z31.d, p7/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sqabs z31.d, p7/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +sqabs z31.b, p8/z, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: sqabs z31.b, p8/z, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.h, p0/z, z7.h +sqabs z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: sqabs z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +sqabs z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: sqabs z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/sqabs_z.s b/llvm/test/MC/AArch64/SVE2p2/sqabs_z.s new file mode 100644 index 000000000000000..c583423fac6eba9 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/sqabs_z.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +sqabs z0.b, p0/z, z0.b // 01000100-00001010-10100000-00000000 +// CHECK-INST: sqabs z0.b, p0/z, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x0a,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 440aa000 + +sqabs z21.h, p5/z, z10.h // 01000100-01001010-10110101-01010101 +// CHECK-INST: sqabs z21.h, p5/z, z10.h +// CHECK-ENCODING: [0x55,0xb5,0x4a,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 444ab555 + +sqabs z23.s, p3/z, z13.s // 01000100-10001010-10101101-10110111 +// CHECK-INST: sqabs z23.s, p3/z, z13.s +// CHECK-ENCODING: [0xb7,0xad,0x8a,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 448aadb7 + +sqabs z31.d, p7/z, z31.d // 01000100-11001010-10111111-11111111 +// CHECK-INST: sqabs z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xca,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 44cabfff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/sqneg_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/sqneg_z-diagnostics.s new file mode 100644 index 000000000000000..576633c0ab1b64a --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/sqneg_z-diagnostics.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid element width + +sqneg z31.b, p7/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sqneg z31.b, p7/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqneg z31.d, p7/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sqneg z31.d, p7/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +sqneg z31.b, p8/z, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: sqneg z31.b, p8/z, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.h, p0/z, z7.h +sqneg z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: sqneg z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +sqneg z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: sqneg z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/sqneg_z.s b/llvm/test/MC/AArch64/SVE2p2/sqneg_z.s new file mode 100644 index 000000000000000..287211fd3ff5dd9 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/sqneg_z.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +sqneg z0.b, p0/z, z0.b // 01000100-00001011-10100000-00000000 +// CHECK-INST: sqneg z0.b, p0/z, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x0b,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 440ba000 + +sqneg z23.h, p3/z, z13.h // 01000100-01001011-10101101-10110111 +// CHECK-INST: sqneg z23.h, p3/z, z13.h +// CHECK-ENCODING: [0xb7,0xad,0x4b,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 444badb7 + +sqneg z21.s, p5/z, z10.s // 01000100-10001011-10110101-01010101 +// CHECK-INST: sqneg z21.s, p5/z, z10.s +// CHECK-ENCODING: [0x55,0xb5,0x8b,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 448bb555 + +sqneg z31.d, p7/z, z31.d // 01000100-11001011-10111111-11111111 +// CHECK-INST: sqneg z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xcb,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 44cbbfff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/urecpe_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/urecpe_z-diagnostics.s new file mode 100644 index 000000000000000..9da2a7e096f3af4 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/urecpe_z-diagnostics.s @@ -0,0 +1,47 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid element width + +urecpe z31.b, p7/z, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: urecpe z31.b, p7/z, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +urecpe z31.h, p7/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: urecpe z31.h, p7/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +urecpe z31.s, p7/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: urecpe z31.s, p7/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +urecpe z31.d, p7/z, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: urecpe z31.d, p7/z, z31.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +urecpe z0.s, p8/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: urecpe z0.s, p8/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.s, p0/z, z7.s +urecpe z0.s, p0/z, z3.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: urecpe z0.s, p0/z, z3.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +urecpe z0.s, p0/z, z3.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: urecpe z0.s, p0/z, z3.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/urecpe_z.s b/llvm/test/MC/AArch64/SVE2p2/urecpe_z.s new file mode 100644 index 000000000000000..6b3ec79c988690a --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/urecpe_z.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +urecpe z0.s, p0/z, z0.s // 01000100-10000010-10100000-00000000 +// CHECK-INST: urecpe z0.s, p0/z, z0.s +// CHECK-ENCODING: [0x00,0xa0,0x82,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 4482a000 + +urecpe z23.s, p3/z, z13.s // 01000100-10000010-10101101-10110111 +// CHECK-INST: urecpe z23.s, p3/z, z13.s +// CHECK-ENCODING: [0xb7,0xad,0x82,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 4482adb7 + +urecpe z31.s, p7/z, z31.s // 01000100-10000010-10111111-11111111 +// CHECK-INST: urecpe z31.s, p7/z, z31.s +// CHECK-ENCODING: [0xff,0xbf,0x82,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 4482bfff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/ursqrte_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/ursqrte_z-diagnostics.s new file mode 100644 index 000000000000000..1b6cf8d78e655ef --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/ursqrte_z-diagnostics.s @@ -0,0 +1,47 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid element width + +ursqrte z31.b, p7/z, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: ursqrte z31.b, p7/z, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ursqrte z31.h, p7/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: ursqrte z31.h, p7/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ursqrte z31.s, p7/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: ursqrte z31.s, p7/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ursqrte z31.d, p7/z, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: ursqrte z31.d, p7/z, z31.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +ursqrte z0.s, p8/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: ursqrte z0.s, p8/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.s, p0/z, z7.s +ursqrte z0.s, p0/z, z3.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: ursqrte z0.s, p0/z, z3.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +ursqrte z0.s, p0/z, z3.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: ursqrte z0.s, p0/z, z3.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/ursqrte_z.s b/llvm/test/MC/AArch64/SVE2p2/ursqrte_z.s new file mode 100644 index 000000000000000..097d82bebd430b4 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/ursqrte_z.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ursqrte z0.s, p0/z, z0.s // 01000100-10000011-10100000-00000000 +// CHECK-INST: ursqrte z0.s, p0/z, z0.s +// CHECK-ENCODING: [0x00,0xa0,0x83,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 4483a000 + +ursqrte z21.s, p5/z, z10.s // 01000100-10000011-10110101-01010101 +// CHECK-INST: ursqrte z21.s, p5/z, z10.s +// CHECK-ENCODING: [0x55,0xb5,0x83,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 4483b555 + +ursqrte z31.s, p7/z, z31.s // 01000100-10000011-10111111-11111111 +// CHECK-INST: ursqrte z31.s, p7/z, z31.s +// CHECK-ENCODING: [0xff,0xbf,0x83,0x44] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 4483bfff \ No newline at end of file From f5ff3a560fe247206814792a7f8efef8215f7689 Mon Sep 17 00:00:00 2001 From: Discookie Date: Mon, 28 Oct 2024 11:19:22 +0000 Subject: [PATCH 155/425] [clang-tidy] Do not emit file path for anonymous enums in `readability-enum-initial-value` check (#112496) Previously the name of anonymous enums in the check were `enum 'enum (unnamed at /full/path/to/file.c:1:1)'`, which breaks reproducibility of clang-tidy reports when the analyzed project is in a different folder. --- .../readability/EnumInitialValueCheck.cpp | 24 ++++++++++++------- clang-tools-extra/docs/ReleaseNotes.rst | 3 ++- .../checkers/readability/enum-initial-value.c | 12 +++++++++- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp index 1cb95c2b2347b76..e0b9939681794f7 100644 --- a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp @@ -123,6 +123,13 @@ AST_MATCHER(EnumDecl, hasSequentialInitialValues) { return !AllEnumeratorsArePowersOfTwo; } +std::string getName(const EnumDecl *Decl) { + if (!Decl->getDeclName()) + return ""; + + return Decl->getQualifiedNameAsString(); +} + } // namespace EnumInitialValueCheck::EnumInitialValueCheck(StringRef Name, @@ -160,10 +167,11 @@ void EnumInitialValueCheck::registerMatchers(MatchFinder *Finder) { void EnumInitialValueCheck::check(const MatchFinder::MatchResult &Result) { if (const auto *Enum = Result.Nodes.getNodeAs("inconsistent")) { DiagnosticBuilder Diag = - diag(Enum->getBeginLoc(), - "initial values in enum %0 are not consistent, consider explicit " - "initialization of all, none or only the first enumerator") - << Enum; + diag( + Enum->getBeginLoc(), + "initial values in enum '%0' are not consistent, consider explicit " + "initialization of all, none or only the first enumerator") + << getName(Enum); for (const EnumConstantDecl *ECD : Enum->enumerators()) if (ECD->getInitExpr() == nullptr) { const SourceLocation EndLoc = Lexer::getLocForEndOfToken( @@ -183,16 +191,16 @@ void EnumInitialValueCheck::check(const MatchFinder::MatchResult &Result) { if (Loc.isInvalid() || Loc.isMacroID()) return; DiagnosticBuilder Diag = diag(Loc, "zero initial value for the first " - "enumerator in %0 can be disregarded") - << Enum; + "enumerator in '%0' can be disregarded") + << getName(Enum); cleanInitialValue(Diag, ECD, *Result.SourceManager, getLangOpts()); return; } if (const auto *Enum = Result.Nodes.getNodeAs("sequential")) { DiagnosticBuilder Diag = diag(Enum->getBeginLoc(), - "sequential initial value in %0 can be ignored") - << Enum; + "sequential initial value in '%0' can be ignored") + << getName(Enum); for (const EnumConstantDecl *ECD : llvm::drop_begin(Enum->enumerators())) cleanInitialValue(Diag, ECD, *Result.SourceManager, getLangOpts()); return; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 7eb2ee511a05f53..54118e5f92f4173 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -249,7 +249,8 @@ Changes in existing checks - Improved :doc:`readability-enum-initial-value ` check by only issuing - diagnostics for the definition of an ``enum``, and by fixing a typo in the + diagnostics for the definition of an ``enum``, by not emitting a redundant + file path for anonymous enums in the diagnostic, and by fixing a typo in the diagnostic. - Improved :doc:`readability-implicit-bool-conversion diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c b/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c index b9a34d0683d7f30..54108585f030f87 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c @@ -53,6 +53,17 @@ enum EMacro2 { // CHECK-FIXES: EMacro2_c = 3, }; + +enum { + // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: initial values in enum '' are not consistent + // CHECK-MESSAGES-ENABLE: :[[@LINE-2]]:1: warning: initial values in enum '' are not consistent + EAnonymous_a = 1, + EAnonymous_b, + // CHECK-FIXES: EAnonymous_b = 2, + EAnonymous_c = 3, +}; + + enum EnumZeroFirstInitialValue { EnumZeroFirstInitialValue_0 = 0, // CHECK-MESSAGES-ENABLE: :[[@LINE-1]]:3: warning: zero initial value for the first enumerator in 'EnumZeroFirstInitialValue' can be disregarded @@ -114,4 +125,3 @@ enum WithFwdDeclSequential : int { EFS2 = 4, // CHECK-FIXES-ENABLE: EFS2 , }; - From 1df5c943439b050dbc34e7a1c88c4d8ec90d26b5 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 28 Oct 2024 11:22:38 +0000 Subject: [PATCH 156/425] [AArch64] Implement FP8 floating-point mode helper intrinsics (#100608) Implement FP8 mode helper intrinsics (as inline functions) as specified in ACLE 2024Q3 "14.2 Helper intrinsics" https://github.com/ARM-software/acle/releases/download/r2024Q3/acle-2024Q3.pdf --- clang/test/CodeGen/aarch64-fpm-helpers.c | 165 +++++++++++++++++++++++ clang/utils/TableGen/NeonEmitter.cpp | 54 ++++++++ 2 files changed, 219 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-fpm-helpers.c diff --git a/clang/test/CodeGen/aarch64-fpm-helpers.c b/clang/test/CodeGen/aarch64-fpm-helpers.c new file mode 100644 index 000000000000000..4bced01d5c71fa9 --- /dev/null +++ b/clang/test/CodeGen/aarch64-fpm-helpers.c @@ -0,0 +1,165 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 + +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_NEON_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_SVE_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_SME_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_NEON_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SVE_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SME_H %s -o - | FileCheck %s + +// REQUIRES: aarch64-registered-target + +#ifdef USE_NEON_H +#include "arm_neon.h" +#endif + +#ifdef USE_SVE_H +#include "arm_sve.h" +#endif + +#ifdef USE_SME_H +#include "arm_sme.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define INIT_ZERO 0 +#define INIT_ONES 0xffffffffffffffffU + +// CHECK-LABEL: define dso_local noundef i64 @test_init( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 0 +// +fpm_t test_init() { return __arm_fpm_init(); } + +// CHECK-LABEL: define dso_local noundef i64 @test_src1_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 -8 +// +fpm_t test_src1_1() { + return __arm_set_fpm_src1_format(INIT_ONES, __ARM_FPM_E5M2); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_src1_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 1 +// +fpm_t test_src1_2() { + return __arm_set_fpm_src1_format(INIT_ZERO, __ARM_FPM_E4M3); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_src2_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 -57 +// +fpm_t test_src2_1() { + return __arm_set_fpm_src2_format(INIT_ONES, __ARM_FPM_E5M2); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_src2_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 8 +// +fpm_t test_src2_2() { + return __arm_set_fpm_src2_format(INIT_ZERO, __ARM_FPM_E4M3); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_dst1_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 -449 +// +fpm_t test_dst1_1() { + return __arm_set_fpm_dst_format(INIT_ONES, __ARM_FPM_E5M2); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_dst2_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 64 +// +fpm_t test_dst2_2() { + return __arm_set_fpm_dst_format(INIT_ZERO, __ARM_FPM_E4M3); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 -16385 +// +fpm_t test_of_mul_1() { + return __arm_set_fpm_overflow_mul(INIT_ONES, __ARM_FPM_INFNAN); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 16384 +// +fpm_t test_of_mul_2() { + return __arm_set_fpm_overflow_mul(INIT_ZERO, __ARM_FPM_SATURATE); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 -32769 +// +fpm_t test_of_cvt_1() { + return __arm_set_fpm_overflow_cvt(INIT_ONES, __ARM_FPM_INFNAN); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 32768 +// +fpm_t test_of_cvt_2() { + return __arm_set_fpm_overflow_cvt(INIT_ZERO, __ARM_FPM_SATURATE); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_lscale( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 8323072 +// +fpm_t test_lscale() { return __arm_set_fpm_lscale(INIT_ZERO, 127); } + +// CHECK-LABEL: define dso_local noundef i64 @test_lscale2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 270582939648 +// +fpm_t test_lscale2() { return __arm_set_fpm_lscale2(INIT_ZERO, 63); } + +// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 2147483648 +// +fpm_t test_nscale_1() { return __arm_set_fpm_nscale(INIT_ZERO, -128); } + +// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 2130706432 +// +fpm_t test_nscale_2() { return __arm_set_fpm_nscale(INIT_ZERO, 127); } + +// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret i64 4278190080 +// +fpm_t test_nscale_3() { return __arm_set_fpm_nscale(INIT_ZERO, -1); } + +#ifdef __cplusplus +} +#endif diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 8caba774649a2b4..c6d82646b40de24 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -2594,6 +2594,60 @@ void NeonEmitter::runVectorTypes(raw_ostream &OS) { OS << "typedef double float64_t;\n"; OS << "#endif\n\n"; + OS << R"( +typedef uint64_t fpm_t; + +enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 }; + +enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE }; + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_fpm_init(void) { + return 0; +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { + return (__fpm & ~7ull) | (fpm_t)__format; +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { + return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { + return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { + return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { + return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) { + return (__fpm & ~0x7f0000ull) | (__scale << 16u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) { + return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) { + return (uint32_t)__fpm | (__scale << 32u); +} + +)"; + emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQd", OS); emitNeonTypeDefs("bQb", OS); From 0cbccb13d6757b0ea7f2a7f29bb598e1935bcf37 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 28 Oct 2024 11:31:18 +0000 Subject: [PATCH 157/425] [RISCV] Remove support for pre-RA vsetvli insertion (#110796) Now that LLVM 19.1.0 has been out for a while with post-vector-RA vsetvli insertion enabled by default, this proposes to remove the flag that restores the old pre-RA behaviour so we only have one configuration going forward. That flag was mainly meant as a fallback in case users ran into issues, but I haven't seen anything reported so far. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 11 +- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 20 +- llvm/test/CodeGen/RISCV/rvv/remat.ll | 468 +++++-------------- 3 files changed, 133 insertions(+), 366 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 3989a966edfd333..a3963fadf3e417a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -179,17 +179,10 @@ bool RISCVInstrInfo::isReallyTriviallyReMaterializable( case RISCV::VMV_S_X: case RISCV::VFMV_S_F: case RISCV::VID_V: - if (MI.getOperand(1).isUndef() && - /* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl - and vtype. Make sure we only rematerialize before RISCVInsertVSETVLI - i.e. -riscv-vsetvl-after-rvv-regalloc=true */ - !MI.hasRegisterImplicitUseOperand(RISCV::VTYPE)) - return true; - break; + return MI.getOperand(1).isUndef(); default: - break; + return TargetInstrInfo::isReallyTriviallyReMaterializable(MI); } - return TargetInstrInfo::isReallyTriviallyReMaterializable(MI); } static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg, diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 089dc6c529193de..72d74d2d79b1d5a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -99,11 +99,6 @@ static cl::opt EnableMISchedLoadStoreClustering( cl::desc("Enable load and store clustering in the machine scheduler"), cl::init(true)); -static cl::opt EnableVSETVLIAfterRVVRegAlloc( - "riscv-vsetvl-after-rvv-regalloc", cl::Hidden, - cl::desc("Insert vsetvls after vector register allocation"), - cl::init(true)); - static cl::opt EnableVLOptimizer("riscv-enable-vl-optimizer", cl::desc("Enable the RISC-V VL Optimizer pass"), @@ -413,8 +408,7 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) { bool RISCVPassConfig::addRegAssignAndRewriteFast() { addPass(createRVVRegAllocPass(false)); - if (EnableVSETVLIAfterRVVRegAlloc) - addPass(createRISCVInsertVSETVLIPass()); + addPass(createRISCVInsertVSETVLIPass()); if (TM->getOptLevel() != CodeGenOptLevel::None && EnableRISCVDeadRegisterElimination) addPass(createRISCVDeadRegisterDefinitionsPass()); @@ -424,8 +418,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() { bool RISCVPassConfig::addRegAssignAndRewriteOptimized() { addPass(createRVVRegAllocPass(true)); addPass(createVirtRegRewriter(false)); - if (EnableVSETVLIAfterRVVRegAlloc) - addPass(createRISCVInsertVSETVLIPass()); + addPass(createRISCVInsertVSETVLIPass()); if (TM->getOptLevel() != CodeGenOptLevel::None && EnableRISCVDeadRegisterElimination) addPass(createRISCVDeadRegisterDefinitionsPass()); @@ -575,15 +568,6 @@ void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVInsertReadWriteCSRPass()); addPass(createRISCVInsertWriteVXRMPass()); addPass(createRISCVLandingPadSetupPass()); - - // Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after - // register coalescing so needVSETVLIPHI doesn't need to look through COPYs. - if (!EnableVSETVLIAfterRVVRegAlloc) { - if (TM->getOptLevel() == CodeGenOptLevel::None) - insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID); - else - insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID); - } } void RISCVPassConfig::addFastRegAlloc() { diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll index 4f58ccb5188d31f..64c59769546fb74 100644 --- a/llvm/test/CodeGen/RISCV/rvv/remat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll @@ -1,53 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,POSTRA -; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-vsetvl-after-rvv-regalloc=false -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,PRERA +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s define void @vid(ptr %p) { -; POSTRA-LABEL: vid: -; POSTRA: # %bb.0: -; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; POSTRA-NEXT: vid.v v8 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vl8re64.v v16, (a0) -; POSTRA-NEXT: vl8re64.v v24, (a0) -; POSTRA-NEXT: vl8re64.v v0, (a0) -; POSTRA-NEXT: vl8re64.v v8, (a0) -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vs8r.v v0, (a0) -; POSTRA-NEXT: vs8r.v v24, (a0) -; POSTRA-NEXT: vs8r.v v16, (a0) -; POSTRA-NEXT: vid.v v8 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: ret -; -; PRERA-LABEL: vid: -; PRERA: # %bb.0: -; PRERA-NEXT: addi sp, sp, -16 -; PRERA-NEXT: .cfi_def_cfa_offset 16 -; PRERA-NEXT: csrr a1, vlenb -; PRERA-NEXT: slli a1, a1, 3 -; PRERA-NEXT: sub sp, sp, a1 -; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; PRERA-NEXT: vid.v v8 -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: addi a1, sp, 16 -; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; PRERA-NEXT: vl8re64.v v24, (a0) -; PRERA-NEXT: vl8re64.v v0, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v0, (a0) -; PRERA-NEXT: vs8r.v v24, (a0) -; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: csrr a0, vlenb -; PRERA-NEXT: slli a0, a0, 3 -; PRERA-NEXT: add sp, sp, a0 -; PRERA-NEXT: addi sp, sp, 16 -; PRERA-NEXT: ret +; CHECK-LABEL: vid: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vs8r.v v0, (a0) +; CHECK-NEXT: vs8r.v v24, (a0) +; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: ret %vid = call @llvm.riscv.vid.nxv8i64( poison, i64 -1) store volatile %vid, ptr %p @@ -111,51 +81,22 @@ define void @vid_passthru(ptr %p, %v) { } define void @vmv.v.i(ptr %p) { -; POSTRA-LABEL: vmv.v.i: -; POSTRA: # %bb.0: -; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; POSTRA-NEXT: vmv.v.i v8, 1 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vl8re64.v v16, (a0) -; POSTRA-NEXT: vl8re64.v v24, (a0) -; POSTRA-NEXT: vl8re64.v v0, (a0) -; POSTRA-NEXT: vl8re64.v v8, (a0) -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vs8r.v v0, (a0) -; POSTRA-NEXT: vs8r.v v24, (a0) -; POSTRA-NEXT: vs8r.v v16, (a0) -; POSTRA-NEXT: vmv.v.i v8, 1 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: ret -; -; PRERA-LABEL: vmv.v.i: -; PRERA: # %bb.0: -; PRERA-NEXT: addi sp, sp, -16 -; PRERA-NEXT: .cfi_def_cfa_offset 16 -; PRERA-NEXT: csrr a1, vlenb -; PRERA-NEXT: slli a1, a1, 3 -; PRERA-NEXT: sub sp, sp, a1 -; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; PRERA-NEXT: vmv.v.i v8, 1 -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: addi a1, sp, 16 -; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; PRERA-NEXT: vl8re64.v v24, (a0) -; PRERA-NEXT: vl8re64.v v0, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v0, (a0) -; PRERA-NEXT: vs8r.v v24, (a0) -; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: csrr a0, vlenb -; PRERA-NEXT: slli a0, a0, 3 -; PRERA-NEXT: add sp, sp, a0 -; PRERA-NEXT: addi sp, sp, 16 -; PRERA-NEXT: ret +; CHECK-LABEL: vmv.v.i: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 1 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vs8r.v v0, (a0) +; CHECK-NEXT: vs8r.v v24, (a0) +; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: vmv.v.i v8, 1 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: ret %vmv.v.i = call @llvm.riscv.vmv.v.x.nxv8i64( poison, i64 1, i64 -1) store volatile %vmv.v.i, ptr %p @@ -172,66 +113,35 @@ define void @vmv.v.i(ptr %p) { ret void } -; The live range of %x needs extended down to the use of vmv.v.x at the end of -; the block. define void @vmv.v.x_needs_extended(ptr %p, i64 %x) { -; POSTRA-LABEL: vmv.v.x_needs_extended: -; POSTRA: # %bb.0: -; POSTRA-NEXT: addi sp, sp, -16 -; POSTRA-NEXT: .cfi_def_cfa_offset 16 -; POSTRA-NEXT: csrr a2, vlenb -; POSTRA-NEXT: slli a2, a2, 3 -; POSTRA-NEXT: sub sp, sp, a2 -; POSTRA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; POSTRA-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; POSTRA-NEXT: vmv.v.x v8, a1 -; POSTRA-NEXT: addi a1, sp, 16 -; POSTRA-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vl8re64.v v16, (a0) -; POSTRA-NEXT: vl8re64.v v24, (a0) -; POSTRA-NEXT: vl8re64.v v0, (a0) -; POSTRA-NEXT: vl8re64.v v8, (a0) -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vs8r.v v0, (a0) -; POSTRA-NEXT: vs8r.v v24, (a0) -; POSTRA-NEXT: vs8r.v v16, (a0) -; POSTRA-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: csrr a0, vlenb -; POSTRA-NEXT: slli a0, a0, 3 -; POSTRA-NEXT: add sp, sp, a0 -; POSTRA-NEXT: addi sp, sp, 16 -; POSTRA-NEXT: ret -; -; PRERA-LABEL: vmv.v.x_needs_extended: -; PRERA: # %bb.0: -; PRERA-NEXT: addi sp, sp, -16 -; PRERA-NEXT: .cfi_def_cfa_offset 16 -; PRERA-NEXT: csrr a2, vlenb -; PRERA-NEXT: slli a2, a2, 3 -; PRERA-NEXT: sub sp, sp, a2 -; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; PRERA-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; PRERA-NEXT: vmv.v.x v8, a1 -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: addi a1, sp, 16 -; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; PRERA-NEXT: vl8re64.v v24, (a0) -; PRERA-NEXT: vl8re64.v v0, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v0, (a0) -; PRERA-NEXT: vs8r.v v24, (a0) -; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: csrr a0, vlenb -; PRERA-NEXT: slli a0, a0, 3 -; PRERA-NEXT: add sp, sp, a0 -; PRERA-NEXT: addi sp, sp, 16 -; PRERA-NEXT: ret +; CHECK-LABEL: vmv.v.x_needs_extended: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vs8r.v v0, (a0) +; CHECK-NEXT: vs8r.v v24, (a0) +; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %vmv.v.x = call @llvm.riscv.vmv.v.x.nxv8i64( poison, i64 %x, i64 -1) store volatile %vmv.v.x, ptr %p @@ -249,53 +159,23 @@ define void @vmv.v.x_needs_extended(ptr %p, i64 %x) { } define void @vmv.v.x_live(ptr %p, i64 %x) { -; POSTRA-LABEL: vmv.v.x_live: -; POSTRA: # %bb.0: -; POSTRA-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; POSTRA-NEXT: vmv.v.x v8, a1 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vl8re64.v v16, (a0) -; POSTRA-NEXT: vl8re64.v v24, (a0) -; POSTRA-NEXT: vl8re64.v v0, (a0) -; POSTRA-NEXT: vl8re64.v v8, (a0) -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vs8r.v v0, (a0) -; POSTRA-NEXT: vs8r.v v24, (a0) -; POSTRA-NEXT: vs8r.v v16, (a0) -; POSTRA-NEXT: vmv.v.x v8, a1 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: sd a1, 0(a0) -; POSTRA-NEXT: ret -; -; PRERA-LABEL: vmv.v.x_live: -; PRERA: # %bb.0: -; PRERA-NEXT: addi sp, sp, -16 -; PRERA-NEXT: .cfi_def_cfa_offset 16 -; PRERA-NEXT: csrr a2, vlenb -; PRERA-NEXT: slli a2, a2, 3 -; PRERA-NEXT: sub sp, sp, a2 -; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; PRERA-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; PRERA-NEXT: vmv.v.x v8, a1 -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: addi a2, sp, 16 -; PRERA-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; PRERA-NEXT: vl8re64.v v24, (a0) -; PRERA-NEXT: vl8re64.v v0, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v0, (a0) -; PRERA-NEXT: vs8r.v v24, (a0) -; PRERA-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: sd a1, 0(a0) -; PRERA-NEXT: csrr a0, vlenb -; PRERA-NEXT: slli a0, a0, 3 -; PRERA-NEXT: add sp, sp, a0 -; PRERA-NEXT: addi sp, sp, 16 -; PRERA-NEXT: ret +; CHECK-LABEL: vmv.v.x_live: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vs8r.v v0, (a0) +; CHECK-NEXT: vs8r.v v24, (a0) +; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret %vmv.v.x = call @llvm.riscv.vmv.v.x.nxv8i64( poison, i64 %x, i64 -1) store volatile %vmv.v.x, ptr %p @@ -314,53 +194,23 @@ define void @vmv.v.x_live(ptr %p, i64 %x) { } define void @vfmv.v.f(ptr %p, double %x) { -; POSTRA-LABEL: vfmv.v.f: -; POSTRA: # %bb.0: -; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; POSTRA-NEXT: vfmv.v.f v8, fa0 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vl8re64.v v16, (a0) -; POSTRA-NEXT: vl8re64.v v24, (a0) -; POSTRA-NEXT: vl8re64.v v0, (a0) -; POSTRA-NEXT: vl8re64.v v8, (a0) -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vs8r.v v0, (a0) -; POSTRA-NEXT: vs8r.v v24, (a0) -; POSTRA-NEXT: vs8r.v v16, (a0) -; POSTRA-NEXT: vfmv.v.f v8, fa0 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: fsd fa0, 0(a0) -; POSTRA-NEXT: ret -; -; PRERA-LABEL: vfmv.v.f: -; PRERA: # %bb.0: -; PRERA-NEXT: addi sp, sp, -16 -; PRERA-NEXT: .cfi_def_cfa_offset 16 -; PRERA-NEXT: csrr a1, vlenb -; PRERA-NEXT: slli a1, a1, 3 -; PRERA-NEXT: sub sp, sp, a1 -; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; PRERA-NEXT: vfmv.v.f v8, fa0 -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: addi a1, sp, 16 -; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; PRERA-NEXT: vl8re64.v v24, (a0) -; PRERA-NEXT: vl8re64.v v0, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v0, (a0) -; PRERA-NEXT: vs8r.v v24, (a0) -; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: fsd fa0, 0(a0) -; PRERA-NEXT: csrr a0, vlenb -; PRERA-NEXT: slli a0, a0, 3 -; PRERA-NEXT: add sp, sp, a0 -; PRERA-NEXT: addi sp, sp, 16 -; PRERA-NEXT: ret +; CHECK-LABEL: vfmv.v.f: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vs8r.v v0, (a0) +; CHECK-NEXT: vs8r.v v24, (a0) +; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: fsd fa0, 0(a0) +; CHECK-NEXT: ret %vfmv.v.f = call @llvm.riscv.vfmv.v.f.nxv8f64( poison, double %x, i64 -1) store volatile %vfmv.v.f, ptr %p @@ -379,53 +229,23 @@ define void @vfmv.v.f(ptr %p, double %x) { } define void @vmv.s.x(ptr %p, i64 %x) { -; POSTRA-LABEL: vmv.s.x: -; POSTRA: # %bb.0: -; POSTRA-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; POSTRA-NEXT: vmv.s.x v8, a1 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vl8re64.v v16, (a0) -; POSTRA-NEXT: vl8re64.v v24, (a0) -; POSTRA-NEXT: vl8re64.v v0, (a0) -; POSTRA-NEXT: vl8re64.v v8, (a0) -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vs8r.v v0, (a0) -; POSTRA-NEXT: vs8r.v v24, (a0) -; POSTRA-NEXT: vs8r.v v16, (a0) -; POSTRA-NEXT: vmv.s.x v8, a1 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: sd a1, 0(a0) -; POSTRA-NEXT: ret -; -; PRERA-LABEL: vmv.s.x: -; PRERA: # %bb.0: -; PRERA-NEXT: addi sp, sp, -16 -; PRERA-NEXT: .cfi_def_cfa_offset 16 -; PRERA-NEXT: csrr a2, vlenb -; PRERA-NEXT: slli a2, a2, 3 -; PRERA-NEXT: sub sp, sp, a2 -; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; PRERA-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; PRERA-NEXT: vmv.s.x v8, a1 -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: addi a2, sp, 16 -; PRERA-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; PRERA-NEXT: vl8re64.v v24, (a0) -; PRERA-NEXT: vl8re64.v v0, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v0, (a0) -; PRERA-NEXT: vs8r.v v24, (a0) -; PRERA-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: sd a1, 0(a0) -; PRERA-NEXT: csrr a0, vlenb -; PRERA-NEXT: slli a0, a0, 3 -; PRERA-NEXT: add sp, sp, a0 -; PRERA-NEXT: addi sp, sp, 16 -; PRERA-NEXT: ret +; CHECK-LABEL: vmv.s.x: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vs8r.v v0, (a0) +; CHECK-NEXT: vs8r.v v24, (a0) +; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret %vmv.s.x = call @llvm.riscv.vmv.s.x.nxv8i64( poison, i64 %x, i64 -1) store volatile %vmv.s.x, ptr %p @@ -444,53 +264,23 @@ define void @vmv.s.x(ptr %p, i64 %x) { } define void @vfmv.s.f(ptr %p, double %x) { -; POSTRA-LABEL: vfmv.s.f: -; POSTRA: # %bb.0: -; POSTRA-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; POSTRA-NEXT: vfmv.s.f v8, fa0 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vl8re64.v v16, (a0) -; POSTRA-NEXT: vl8re64.v v24, (a0) -; POSTRA-NEXT: vl8re64.v v0, (a0) -; POSTRA-NEXT: vl8re64.v v8, (a0) -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: vs8r.v v0, (a0) -; POSTRA-NEXT: vs8r.v v24, (a0) -; POSTRA-NEXT: vs8r.v v16, (a0) -; POSTRA-NEXT: vfmv.s.f v8, fa0 -; POSTRA-NEXT: vs8r.v v8, (a0) -; POSTRA-NEXT: fsd fa0, 0(a0) -; POSTRA-NEXT: ret -; -; PRERA-LABEL: vfmv.s.f: -; PRERA: # %bb.0: -; PRERA-NEXT: addi sp, sp, -16 -; PRERA-NEXT: .cfi_def_cfa_offset 16 -; PRERA-NEXT: csrr a1, vlenb -; PRERA-NEXT: slli a1, a1, 3 -; PRERA-NEXT: sub sp, sp, a1 -; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; PRERA-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; PRERA-NEXT: vfmv.s.f v8, fa0 -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: addi a1, sp, 16 -; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; PRERA-NEXT: vl8re64.v v24, (a0) -; PRERA-NEXT: vl8re64.v v0, (a0) -; PRERA-NEXT: vl8re64.v v16, (a0) -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v0, (a0) -; PRERA-NEXT: vs8r.v v24, (a0) -; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; PRERA-NEXT: vs8r.v v16, (a0) -; PRERA-NEXT: vs8r.v v8, (a0) -; PRERA-NEXT: fsd fa0, 0(a0) -; PRERA-NEXT: csrr a0, vlenb -; PRERA-NEXT: slli a0, a0, 3 -; PRERA-NEXT: add sp, sp, a0 -; PRERA-NEXT: addi sp, sp, 16 -; PRERA-NEXT: ret +; CHECK-LABEL: vfmv.s.f: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v0, (a0) +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vs8r.v v0, (a0) +; CHECK-NEXT: vs8r.v v24, (a0) +; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: fsd fa0, 0(a0) +; CHECK-NEXT: ret %vfmv.s.f = call @llvm.riscv.vfmv.s.f.nxv8f64( poison, double %x, i64 -1) store volatile %vfmv.s.f, ptr %p From d6d4569dd90261bc8cdb3d1c3c2204cde1fe566f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 28 Oct 2024 11:45:31 +0000 Subject: [PATCH 158/425] Fix MSVC "signed/unsigned mismatch" warnings. NFC. --- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 911eec48bcb2fd4..a57c95d5b966721 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19055,7 +19055,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, Args.push_back(llvm::PoisonValue::get(IntTy)); for (unsigned I = 0; I != E->getNumArgs(); ++I) { llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E); - if (I <= !InsertOld && Size < 32) { + if (I <= (InsertOld ? 0u : 1u) && Size < 32) { if (!DataTy->isIntegerTy()) V = Builder.CreateBitCast( V, llvm::IntegerType::get(Builder.getContext(), Size)); From 056cf936a730a72db7ad9da8b475814c6bbe03b7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 28 Oct 2024 11:52:44 +0000 Subject: [PATCH 159/425] [DAG] Fold (and X, (bswap/bitreverse (not Y))) -> (and X, (not (bswap/bitreverse Y))) (#112547) On ANDNOT capable targets we can always do this profitably, without ANDNOT we only attempt this if we don't introduce an additional NOT Fixes #112425 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 + llvm/test/CodeGen/X86/andnot-patterns.ll | 683 +++++++++++++----- 2 files changed, 505 insertions(+), 189 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ad2d2ede302af84..b800204d917503f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7353,6 +7353,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) return R; + // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y))) + // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y))) + SDValue X, Y, NotY; + for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE}) + if (sd_match(N, + m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) && + sd_match(NotY, m_Not(m_Value(Y))) && + (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse())) + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT)); + // Masking the negated extension of a boolean is just the zero-extended // boolean: // and (sub 0, zext(bool X)), 1 --> zext(bool X) diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll index 101e4ed008f7b6c..1df29f0b12d1b61 100644 --- a/llvm/test/CodeGen/X86/andnot-patterns.ll +++ b/llvm/test/CodeGen/X86/andnot-patterns.ll @@ -7,6 +7,7 @@ ; TODO - PR112425 - attempt to reconstruct andnot patterns through bitwise-agnostic operations declare void @use_i64(i64) +declare void @use_i32(i32) ; ; Fold (and X, (rotl (not Y), Z))) -> (and X, (not (rotl Y, Z))) @@ -132,8 +133,8 @@ define i8 @andnot_rotl_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ret i8 %and } -define i64 @andnot_rotl_i64_multiuse(i64 %a0, i64 %a1, i64 %a2) nounwind { -; X86-LABEL: andnot_rotl_i64_multiuse: +define i64 @andnot_rotl_i64_multiuse_rot(i64 %a0, i64 %a1, i64 %a2) nounwind { +; X86-LABEL: andnot_rotl_i64_multiuse_rot: ; X86: # %bb.0: ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi @@ -171,7 +172,7 @@ define i64 @andnot_rotl_i64_multiuse(i64 %a0, i64 %a1, i64 %a2) nounwind { ; X86-NEXT: popl %ebx ; X86-NEXT: retl ; -; X64-LABEL: andnot_rotl_i64_multiuse: +; X64-LABEL: andnot_rotl_i64_multiuse_rot: ; X64: # %bb.0: ; X64-NEXT: pushq %rbx ; X64-NEXT: movq %rdx, %rcx @@ -316,30 +317,84 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ret i8 %and } -; -; Fold (and X, (bswap (not Y)))) -> (and X, (not (bswap Y))) -; - -define i64 @andnot_bswap_i64(i64 %a0, i64 %a1) nounwind { -; X86-LABEL: andnot_bswap_i64: +define i32 @andnot_rotr_i32_multiuse_not(i32 %a0, i32 %a1, i32 %a2) nounwind { +; X86-LABEL: andnot_rotr_i32_multiuse_not: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl %esi +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: notl %eax -; X86-NEXT: notl %edx -; X86-NEXT: bswapl %edx -; X86-NEXT: bswapl %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: rorl %cl, %esi +; X86-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NEXT: pushl %eax +; X86-NEXT: calll use_i32@PLT +; X86-NEXT: addl $4, %esp +; X86-NEXT: movl %esi, %eax +; X86-NEXT: popl %esi ; X86-NEXT: retl ; -; X64-LABEL: andnot_bswap_i64: +; X64-LABEL: andnot_rotr_i32_multiuse_not: ; X64: # %bb.0: -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: notq %rax -; X64-NEXT: bswapq %rax -; X64-NEXT: andq %rdi, %rax +; X64-NEXT: pushq %rbx +; X64-NEXT: movl %edx, %ecx +; X64-NEXT: notl %esi +; X64-NEXT: movl %esi, %ebx +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: rorl %cl, %ebx +; X64-NEXT: andl %edi, %ebx +; X64-NEXT: movl %esi, %edi +; X64-NEXT: callq use_i32@PLT +; X64-NEXT: movl %ebx, %eax +; X64-NEXT: popq %rbx ; X64-NEXT: retq + %not = xor i32 %a1, -1 + %rot = tail call i32 @llvm.fshr.i32(i32 %not, i32 %not, i32 %a2) + %and = and i32 %rot, %a0 + call void @use_i32(i32 %not) + ret i32 %and +} + +; +; Fold (and X, (bswap (not Y)))) -> (and X, (not (bswap Y))) +; + +define i64 @andnot_bswap_i64(i64 %a0, i64 %a1) nounwind { +; X86-NOBMI-LABEL: andnot_bswap_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: bswapl %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: bswapl %edx +; X86-NOBMI-NEXT: notl %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: andnot_bswap_i64: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: bswapl %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: bswapl %ecx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_bswap_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rax +; X64-NOBMI-NEXT: bswapq %rax +; X64-NOBMI-NEXT: notq %rax +; X64-NOBMI-NEXT: andq %rdi, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_bswap_i64: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: bswapq %rsi +; X64-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-BMI-NEXT: retq %not = xor i64 %a1, -1 %bswap = tail call i64 @llvm.bswap.i64(i64 %not) %and = and i64 %bswap, %a0 @@ -347,21 +402,34 @@ define i64 @andnot_bswap_i64(i64 %a0, i64 %a1) nounwind { } define i32 @andnot_bswap_i32(i32 %a0, i32 %a1) nounwind { -; X86-LABEL: andnot_bswap_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax -; X86-NEXT: bswapl %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_bswap_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: bswapl %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_bswap_i32: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: bswapl %eax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_bswap_i32: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: bswapl %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_bswap_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: bswapl %eax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_bswap_i32: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: bswapl %esi +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %bswap = tail call i32 @llvm.bswap.i32(i32 %not) %and = and i32 %bswap, %a0 @@ -371,101 +439,273 @@ define i32 @andnot_bswap_i32(i32 %a0, i32 %a1) nounwind { define i16 @andnot_bswap_i16(i16 %a0, i16 %a1) nounwind { ; X86-LABEL: andnot_bswap_i16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rolw $8, %ax +; X86-NEXT: notl %eax ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; -; X64-LABEL: andnot_bswap_i16: -; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: rolw $8, %ax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: retq +; X64-NOBMI-LABEL: andnot_bswap_i16: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: rolw $8, %ax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_bswap_i16: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: rolw $8, %si +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI-NEXT: retq %not = xor i16 %a1, -1 %bswap = tail call i16 @llvm.bswap.i16(i16 %not) %and = and i16 %bswap, %a0 ret i16 %and } -; -; Fold (and X, (bitreverse (not Y)))) -> (and X, (not (bitreverse Y))) -; - -define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { -; X86-LABEL: andnot_bitreverse_i64: +define i32 @andnot_bswap_i32_multiuse_bswap(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: andnot_bswap_i32_multiuse_bswap: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: notl %eax -; X86-NEXT: notl %ecx -; X86-NEXT: bswapl %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F -; X86-NEXT: shll $4, %edx -; X86-NEXT: shrl $4, %ecx -; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F -; X86-NEXT: orl %edx, %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 -; X86-NEXT: shrl $2, %ecx -; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-NEXT: leal (%ecx,%edx,4), %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 -; X86-NEXT: shrl %ecx -; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; X86-NEXT: leal (%ecx,%edx,2), %edx ; X86-NEXT: bswapl %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F -; X86-NEXT: shll $4, %ecx -; X86-NEXT: shrl $4, %eax -; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-NEXT: shrl $2, %eax -; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X86-NEXT: leal (%eax,%ecx,4), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; X86-NEXT: shrl %eax -; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; X86-NEXT: leal (%eax,%ecx,2), %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: andl %eax, %esi +; X86-NEXT: pushl %eax +; X86-NEXT: calll use_i32@PLT +; X86-NEXT: addl $4, %esp +; X86-NEXT: movl %esi, %eax +; X86-NEXT: popl %esi ; X86-NEXT: retl ; -; X64-LABEL: andnot_bitreverse_i64: +; X64-LABEL: andnot_bswap_i32_multiuse_bswap: ; X64: # %bb.0: -; X64-NEXT: notq %rsi -; X64-NEXT: bswapq %rsi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: shrq $4, %rax -; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F -; X64-NEXT: andq %rcx, %rax -; X64-NEXT: andq %rcx, %rsi -; X64-NEXT: shlq $4, %rsi -; X64-NEXT: orq %rax, %rsi -; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 -; X64-NEXT: movq %rsi, %rcx -; X64-NEXT: andq %rax, %rcx -; X64-NEXT: shrq $2, %rsi -; X64-NEXT: andq %rax, %rsi -; X64-NEXT: leaq (%rsi,%rcx,4), %rax -; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: andq %rcx, %rdx -; X64-NEXT: shrq %rax -; X64-NEXT: andq %rcx, %rax -; X64-NEXT: leaq (%rax,%rdx,2), %rax -; X64-NEXT: andq %rdi, %rax +; X64-NEXT: pushq %rbx +; X64-NEXT: movl %edi, %ebx +; X64-NEXT: notl %esi +; X64-NEXT: bswapl %esi +; X64-NEXT: andl %esi, %ebx +; X64-NEXT: movl %esi, %edi +; X64-NEXT: callq use_i32@PLT +; X64-NEXT: movl %ebx, %eax +; X64-NEXT: popq %rbx ; X64-NEXT: retq + %not = xor i32 %a1, -1 + %bswap = tail call i32 @llvm.bswap.i32(i32 %not) + %and = and i32 %bswap, %a0 + call void @use_i32(i32 %bswap) + ret i32 %and +} + +define i32 @andnot_bswap_i32_multiuse_not(i32 %a0, i32 %a1) nounwind { +; X86-NOBMI-LABEL: andnot_bswap_i32_multiuse_not: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: movl %eax, %esi +; X86-NOBMI-NEXT: bswapl %esi +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: pushl %eax +; X86-NOBMI-NEXT: calll use_i32@PLT +; X86-NOBMI-NEXT: addl $4, %esp +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: andnot_bswap_i32_multiuse_not: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl %eax, %ecx +; X86-BMI-NEXT: notl %ecx +; X86-BMI-NEXT: bswapl %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %esi +; X86-BMI-NEXT: pushl %ecx +; X86-BMI-NEXT: calll use_i32@PLT +; X86-BMI-NEXT: addl $4, %esp +; X86-BMI-NEXT: movl %esi, %eax +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_bswap_i32_multiuse_not: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: pushq %rbx +; X64-NOBMI-NEXT: notl %esi +; X64-NOBMI-NEXT: movl %esi, %ebx +; X64-NOBMI-NEXT: bswapl %ebx +; X64-NOBMI-NEXT: andl %edi, %ebx +; X64-NOBMI-NEXT: movl %esi, %edi +; X64-NOBMI-NEXT: callq use_i32@PLT +; X64-NOBMI-NEXT: movl %ebx, %eax +; X64-NOBMI-NEXT: popq %rbx +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_bswap_i32_multiuse_not: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: pushq %rbx +; X64-BMI-NEXT: movl %esi, %eax +; X64-BMI-NEXT: notl %eax +; X64-BMI-NEXT: bswapl %esi +; X64-BMI-NEXT: andnl %edi, %esi, %ebx +; X64-BMI-NEXT: movl %eax, %edi +; X64-BMI-NEXT: callq use_i32@PLT +; X64-BMI-NEXT: movl %ebx, %eax +; X64-BMI-NEXT: popq %rbx +; X64-BMI-NEXT: retq + %not = xor i32 %a1, -1 + %bswap = tail call i32 @llvm.bswap.i32(i32 %not) + %and = and i32 %bswap, %a0 + call void @use_i32(i32 %not) + ret i32 %and +} + +; +; Fold (and X, (bitreverse (not Y)))) -> (and X, (not (bitreverse Y))) +; + +define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { +; X86-NOBMI-LABEL: andnot_bitreverse_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: bswapl %eax +; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NOBMI-NEXT: shll $4, %edx +; X86-NOBMI-NEXT: shrl $4, %eax +; X86-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NOBMI-NEXT: orl %edx, %eax +; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NOBMI-NEXT: shrl $2, %eax +; X86-NOBMI-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NOBMI-NEXT: leal (%eax,%edx,4), %eax +; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOBMI-NEXT: shrl %eax +; X86-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NOBMI-NEXT: leal (%eax,%edx,2), %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: bswapl %ecx +; X86-NOBMI-NEXT: movl %ecx, %edx +; X86-NOBMI-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-NOBMI-NEXT: shll $4, %edx +; X86-NOBMI-NEXT: shrl $4, %ecx +; X86-NOBMI-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NOBMI-NEXT: orl %edx, %ecx +; X86-NOBMI-NEXT: movl %ecx, %edx +; X86-NOBMI-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-NOBMI-NEXT: shrl $2, %ecx +; X86-NOBMI-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NOBMI-NEXT: leal (%ecx,%edx,4), %ecx +; X86-NOBMI-NEXT: movl %ecx, %edx +; X86-NOBMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-NOBMI-NEXT: shrl %ecx +; X86-NOBMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NOBMI-NEXT: leal (%ecx,%edx,2), %edx +; X86-NOBMI-NEXT: notl %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: andnot_bitreverse_i64: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: bswapl %eax +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-BMI-NEXT: shll $4, %edx +; X86-BMI-NEXT: shrl $4, %eax +; X86-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-BMI-NEXT: orl %edx, %eax +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-BMI-NEXT: shrl $2, %eax +; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-BMI-NEXT: leal (%eax,%edx,4), %eax +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-BMI-NEXT: shrl %eax +; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-BMI-NEXT: leal (%eax,%edx,2), %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: bswapl %ecx +; X86-BMI-NEXT: movl %ecx, %edx +; X86-BMI-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F +; X86-BMI-NEXT: shll $4, %edx +; X86-BMI-NEXT: shrl $4, %ecx +; X86-BMI-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-BMI-NEXT: orl %edx, %ecx +; X86-BMI-NEXT: movl %ecx, %edx +; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333 +; X86-BMI-NEXT: shrl $2, %ecx +; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-BMI-NEXT: leal (%ecx,%edx,4), %ecx +; X86-BMI-NEXT: movl %ecx, %edx +; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-BMI-NEXT: shrl %ecx +; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-BMI-NEXT: leal (%ecx,%edx,2), %ecx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_bitreverse_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: bswapq %rsi +; X64-NOBMI-NEXT: movq %rsi, %rax +; X64-NOBMI-NEXT: shrq $4, %rax +; X64-NOBMI-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F +; X64-NOBMI-NEXT: andq %rcx, %rax +; X64-NOBMI-NEXT: andq %rcx, %rsi +; X64-NOBMI-NEXT: shlq $4, %rsi +; X64-NOBMI-NEXT: orq %rax, %rsi +; X64-NOBMI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; X64-NOBMI-NEXT: movq %rsi, %rcx +; X64-NOBMI-NEXT: andq %rax, %rcx +; X64-NOBMI-NEXT: shrq $2, %rsi +; X64-NOBMI-NEXT: andq %rax, %rsi +; X64-NOBMI-NEXT: leaq (%rsi,%rcx,4), %rax +; X64-NOBMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; X64-NOBMI-NEXT: movq %rax, %rdx +; X64-NOBMI-NEXT: andq %rcx, %rdx +; X64-NOBMI-NEXT: shrq %rax +; X64-NOBMI-NEXT: andq %rcx, %rax +; X64-NOBMI-NEXT: leaq (%rax,%rdx,2), %rax +; X64-NOBMI-NEXT: notq %rax +; X64-NOBMI-NEXT: andq %rdi, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_bitreverse_i64: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: bswapq %rsi +; X64-BMI-NEXT: movq %rsi, %rax +; X64-BMI-NEXT: shrq $4, %rax +; X64-BMI-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F +; X64-BMI-NEXT: andq %rcx, %rax +; X64-BMI-NEXT: andq %rcx, %rsi +; X64-BMI-NEXT: shlq $4, %rsi +; X64-BMI-NEXT: orq %rax, %rsi +; X64-BMI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; X64-BMI-NEXT: movq %rsi, %rcx +; X64-BMI-NEXT: andq %rax, %rcx +; X64-BMI-NEXT: shrq $2, %rsi +; X64-BMI-NEXT: andq %rax, %rsi +; X64-BMI-NEXT: leaq (%rsi,%rcx,4), %rax +; X64-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; X64-BMI-NEXT: movq %rax, %rdx +; X64-BMI-NEXT: andq %rcx, %rdx +; X64-BMI-NEXT: shrq %rax +; X64-BMI-NEXT: andq %rcx, %rax +; X64-BMI-NEXT: leaq (%rax,%rdx,2), %rax +; X64-BMI-NEXT: andnq %rdi, %rax, %rax +; X64-BMI-NEXT: retq %not = xor i64 %a1, -1 %bitrev = tail call i64 @llvm.bitreverse.i64(i64 %not) %and = and i64 %bitrev, %a0 @@ -473,53 +713,99 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { } define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind { -; X86-LABEL: andnot_bitreverse_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax -; X86-NEXT: bswapl %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F -; X86-NEXT: shll $4, %ecx -; X86-NEXT: shrl $4, %eax -; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-NEXT: shrl $2, %eax -; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X86-NEXT: leal (%eax,%ecx,4), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; X86-NEXT: shrl %eax -; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; X86-NEXT: leal (%eax,%ecx,2), %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_bitreverse_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: bswapl %eax +; X86-NOBMI-NEXT: movl %eax, %ecx +; X86-NOBMI-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-NOBMI-NEXT: shll $4, %ecx +; X86-NOBMI-NEXT: shrl $4, %eax +; X86-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-NOBMI-NEXT: orl %ecx, %eax +; X86-NOBMI-NEXT: movl %eax, %ecx +; X86-NOBMI-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-NOBMI-NEXT: shrl $2, %eax +; X86-NOBMI-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-NOBMI-NEXT: leal (%eax,%ecx,4), %eax +; X86-NOBMI-NEXT: movl %eax, %ecx +; X86-NOBMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-NOBMI-NEXT: shrl %eax +; X86-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NOBMI-NEXT: leal (%eax,%ecx,2), %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_bitreverse_i32: -; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: notl %esi -; X64-NEXT: bswapl %esi -; X64-NEXT: movl %esi, %eax -; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F -; X64-NEXT: shll $4, %eax -; X64-NEXT: shrl $4, %esi -; X64-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F -; X64-NEXT: orl %eax, %esi -; X64-NEXT: movl %esi, %eax -; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X64-NEXT: shrl $2, %esi -; X64-NEXT: andl $858993459, %esi # imm = 0x33333333 -; X64-NEXT: leal (%rsi,%rax,4), %eax -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; X64-NEXT: shrl %eax -; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; X64-NEXT: leal (%rax,%rcx,2), %eax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_bitreverse_i32: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: bswapl %eax +; X86-BMI-NEXT: movl %eax, %ecx +; X86-BMI-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F +; X86-BMI-NEXT: shll $4, %ecx +; X86-BMI-NEXT: shrl $4, %eax +; X86-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X86-BMI-NEXT: orl %ecx, %eax +; X86-BMI-NEXT: movl %eax, %ecx +; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333 +; X86-BMI-NEXT: shrl $2, %eax +; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X86-BMI-NEXT: leal (%eax,%ecx,4), %eax +; X86-BMI-NEXT: movl %eax, %ecx +; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-BMI-NEXT: shrl %eax +; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-BMI-NEXT: leal (%eax,%ecx,2), %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_bitreverse_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NOBMI-NEXT: bswapl %esi +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-NOBMI-NEXT: shll $4, %eax +; X64-NOBMI-NEXT: shrl $4, %esi +; X64-NOBMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X64-NOBMI-NEXT: orl %eax, %esi +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-NOBMI-NEXT: shrl $2, %esi +; X64-NOBMI-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X64-NOBMI-NEXT: leal (%rsi,%rax,4), %eax +; X64-NOBMI-NEXT: movl %eax, %ecx +; X64-NOBMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-NOBMI-NEXT: shrl %eax +; X64-NOBMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-NOBMI-NEXT: leal (%rax,%rcx,2), %eax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_bitreverse_i32: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI-NEXT: bswapl %esi +; X64-BMI-NEXT: movl %esi, %eax +; X64-BMI-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F +; X64-BMI-NEXT: shll $4, %eax +; X64-BMI-NEXT: shrl $4, %esi +; X64-BMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F +; X64-BMI-NEXT: orl %eax, %esi +; X64-BMI-NEXT: movl %esi, %eax +; X64-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-BMI-NEXT: shrl $2, %esi +; X64-BMI-NEXT: andl $858993459, %esi # imm = 0x33333333 +; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax +; X64-BMI-NEXT: movl %eax, %ecx +; X64-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-BMI-NEXT: shrl %eax +; X64-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax +; X64-BMI-NEXT: andnl %edi, %eax, %eax +; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %bitrev = tail call i32 @llvm.bitreverse.i32(i32 %not) %and = and i32 %bitrev, %a0 @@ -529,8 +815,7 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind { define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind { ; X86-LABEL: andnot_bitreverse_i16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rolw $8, %ax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $3855, %ecx # imm = 0xF0F @@ -548,34 +833,59 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind { ; X86-NEXT: shrl %eax ; X86-NEXT: andl $21845, %eax # imm = 0x5555 ; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: notl %eax ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; -; X64-LABEL: andnot_bitreverse_i16: -; X64: # %bb.0: -; X64-NEXT: # kill: def $esi killed $esi def $rsi -; X64-NEXT: notl %esi -; X64-NEXT: rolw $8, %si -; X64-NEXT: movl %esi, %eax -; X64-NEXT: andl $3855, %eax # imm = 0xF0F -; X64-NEXT: shll $4, %eax -; X64-NEXT: shrl $4, %esi -; X64-NEXT: andl $3855, %esi # imm = 0xF0F -; X64-NEXT: orl %eax, %esi -; X64-NEXT: movl %esi, %eax -; X64-NEXT: andl $13107, %eax # imm = 0x3333 -; X64-NEXT: shrl $2, %esi -; X64-NEXT: andl $13107, %esi # imm = 0x3333 -; X64-NEXT: leal (%rsi,%rax,4), %eax -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: andl $21845, %ecx # imm = 0x5555 -; X64-NEXT: shrl %eax -; X64-NEXT: andl $21845, %eax # imm = 0x5555 -; X64-NEXT: leal (%rax,%rcx,2), %eax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: retq +; X64-NOBMI-LABEL: andnot_bitreverse_i16: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: # kill: def $esi killed $esi def $rsi +; X64-NOBMI-NEXT: rolw $8, %si +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: andl $3855, %eax # imm = 0xF0F +; X64-NOBMI-NEXT: shll $4, %eax +; X64-NOBMI-NEXT: shrl $4, %esi +; X64-NOBMI-NEXT: andl $3855, %esi # imm = 0xF0F +; X64-NOBMI-NEXT: orl %eax, %esi +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: andl $13107, %eax # imm = 0x3333 +; X64-NOBMI-NEXT: shrl $2, %esi +; X64-NOBMI-NEXT: andl $13107, %esi # imm = 0x3333 +; X64-NOBMI-NEXT: leal (%rsi,%rax,4), %eax +; X64-NOBMI-NEXT: movl %eax, %ecx +; X64-NOBMI-NEXT: andl $21845, %ecx # imm = 0x5555 +; X64-NOBMI-NEXT: shrl %eax +; X64-NOBMI-NEXT: andl $21845, %eax # imm = 0x5555 +; X64-NOBMI-NEXT: leal (%rax,%rcx,2), %eax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_bitreverse_i16: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI-NEXT: rolw $8, %si +; X64-BMI-NEXT: movl %esi, %eax +; X64-BMI-NEXT: andl $3855, %eax # imm = 0xF0F +; X64-BMI-NEXT: shll $4, %eax +; X64-BMI-NEXT: shrl $4, %esi +; X64-BMI-NEXT: andl $3855, %esi # imm = 0xF0F +; X64-BMI-NEXT: orl %eax, %esi +; X64-BMI-NEXT: movl %esi, %eax +; X64-BMI-NEXT: andl $13107, %eax # imm = 0x3333 +; X64-BMI-NEXT: shrl $2, %esi +; X64-BMI-NEXT: andl $13107, %esi # imm = 0x3333 +; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax +; X64-BMI-NEXT: movl %eax, %ecx +; X64-BMI-NEXT: andl $21845, %ecx # imm = 0x5555 +; X64-BMI-NEXT: shrl %eax +; X64-BMI-NEXT: andl $21845, %eax # imm = 0x5555 +; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax +; X64-BMI-NEXT: andnl %edi, %eax, %eax +; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI-NEXT: retq %not = xor i16 %a1, -1 %bitrev = tail call i16 @llvm.bitreverse.i16(i16 %not) %and = and i16 %bitrev, %a0 @@ -586,7 +896,6 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind { ; X86-LABEL: andnot_bitreverse_i8: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notb %al ; X86-NEXT: rolb $4, %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andb $51, %cl @@ -600,12 +909,12 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind { ; X86-NEXT: shrb %al ; X86-NEXT: andb $85, %al ; X86-NEXT: orb %cl, %al +; X86-NEXT: notb %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; ; X64-LABEL: andnot_bitreverse_i8: ; X64: # %bb.0: -; X64-NEXT: notb %sil ; X64-NEXT: rolb $4, %sil ; X64-NEXT: movl %esi, %eax ; X64-NEXT: andb $51, %al @@ -619,6 +928,7 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind { ; X64-NEXT: shrb %al ; X64-NEXT: andb $85, %al ; X64-NEXT: orb %cl, %al +; X64-NEXT: notb %al ; X64-NEXT: andb %dil, %al ; X64-NEXT: retq %not = xor i8 %a1, -1 @@ -626,8 +936,3 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind { %and = and i8 %bitrev, %a0 ret i8 %and } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; X64-BMI: {{.*}} -; X64-NOBMI: {{.*}} -; X86-BMI: {{.*}} -; X86-NOBMI: {{.*}} From 75c75fc16e8a202668cfa71404724117b22336b9 Mon Sep 17 00:00:00 2001 From: dong-miao <601183878@qq.com> Date: Mon, 28 Oct 2024 04:54:51 -0700 Subject: [PATCH 160/425] [RISCV]Add svvptc extension (#113882) --- clang/test/Driver/print-supported-extensions-riscv.c | 1 + clang/test/Preprocessor/riscv-target-features.c | 9 +++++++++ llvm/docs/RISCVUsage.rst | 1 + llvm/docs/ReleaseNotes.md | 2 +- llvm/lib/Target/RISCV/RISCVFeatures.td | 4 ++++ llvm/test/CodeGen/RISCV/attributes.ll | 4 ++++ llvm/test/MC/RISCV/attribute-arch.s | 3 +++ llvm/unittests/TargetParser/RISCVISAInfoTest.cpp | 1 + 8 files changed, 24 insertions(+), 1 deletion(-) diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index fc8a9c04667b6ba..4d93144724dffb4 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -147,6 +147,7 @@ // CHECK-NEXT: svinval 1.0 'Svinval' (Fine-Grained Address-Translation Cache Invalidation) // CHECK-NEXT: svnapot 1.0 'Svnapot' (NAPOT Translation Contiguity) // CHECK-NEXT: svpbmt 1.0 'Svpbmt' (Page-Based Memory Types) +// CHECK-NEXT: svvptc 1.0 'svvptc' (Obviating Memory-Management Instructions after Marking PTEs Valid) // CHECK-NEXT: xcvalu 1.0 'XCValu' (CORE-V ALU Operations) // CHECK-NEXT: xcvbi 1.0 'XCVbi' (CORE-V Immediate Branching) // CHECK-NEXT: xcvbitmanip 1.0 'XCVbitmanip' (CORE-V Bit Manipulation) diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c index 6e586714af84d3c..597325ffa5e4eef 100644 --- a/clang/test/Preprocessor/riscv-target-features.c +++ b/clang/test/Preprocessor/riscv-target-features.c @@ -52,6 +52,7 @@ // CHECK-NOT: __riscv_svinval {{.*$}} // CHECK-NOT: __riscv_svnapot {{.*$}} // CHECK-NOT: __riscv_svpbmt {{.*$}} +// CHECK-NOT: __riscv_svvptc {{.*$}} // CHECK-NOT: __riscv_v {{.*$}} // CHECK-NOT: __riscv_v_elen {{.*$}} // CHECK-NOT: __riscv_v_elen_fp {{.*$}} @@ -516,6 +517,14 @@ // RUN: -o - | FileCheck --check-prefix=CHECK-SVPBMT-EXT %s // CHECK-SVPBMT-EXT: __riscv_svpbmt 1000000{{$}} +// RUN: %clang --target=riscv32-unknown-linux-gnu \ +// RUN: -march=rv32isvvptc -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-SVVPTC-EXT %s +// RUN: %clang --target=riscv64-unknown-linux-gnu \ +// RUN: -march=rv64isvvptc -E -dM %s \ +// RUN: -o - | FileCheck --check-prefix=CHECK-SVVPTC-EXT %s +// CHECK-SVVPTC-EXT: __riscv_svvptc 1000000{{$}} + // RUN: %clang --target=riscv32-unknown-linux-gnu \ // RUN: -march=rv32iv1p0 -E -dM %s \ // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 6075a2289d473de..dae6f7c46cc48c6 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -156,6 +156,7 @@ on support follow. ``Svinval`` Assembly Support ``Svnapot`` Assembly Support ``Svpbmt`` Supported + ``Svvptc`` Supported ``V`` Supported ``Za128rs`` Supported (`See note <#riscv-profiles-extensions-note>`__) ``Za64rs`` Supported (`See note <#riscv-profiles-extensions-note>`__) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index f7215279940d693..22a32d110855d55 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -178,7 +178,7 @@ Changes to the RISC-V Backend means Zve32x and Zve32f will also require Zvl64b. The prior support was largely untested. * The `Zvbc32e` and `Zvkgs` extensions are now supported experimentally. -* Added `Smctr` and `Ssctr` extensions. +* Added `Smctr`, `Ssctr` and `Svvptc` extensions. * `-mcpu=syntacore-scr7` was added. * The `Zacas` extension is no longer marked as experimental. * The `Smmpm`, `Smnpm`, `Ssnpm`, `Supm`, and `Sspm` pointer masking extensions diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 559f0e5950edddb..608782d7839a9fa 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1029,6 +1029,10 @@ def FeatureStdExtSvpbmt : RISCVExtension<"svpbmt", 1, 0, "'Svpbmt' (Page-Based Memory Types)">; +def FeatureStdExtSvvptc + : RISCVExtension<"svvptc", 1, 0, + "'svvptc' (Obviating Memory-Management Instructions after Marking PTEs Valid)">; + def FeatureStdExtSha : RISCVExtension<"sha", 1, 0, "'Sha' (Augmented Hypervisor)", diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 5c835befd6f5050..e5b308a172661c4 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -62,6 +62,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+svbare %s -o - | FileCheck --check-prefixes=CHECK,RV32SVBARE %s ; RUN: llc -mtriple=riscv32 -mattr=+svnapot %s -o - | FileCheck --check-prefixes=CHECK,RV32SVNAPOT %s ; RUN: llc -mtriple=riscv32 -mattr=+svpbmt %s -o - | FileCheck --check-prefixes=CHECK,RV32SVPBMT %s +; RUN: llc -mtriple=riscv32 -mattr=+svvptc %s -o - | FileCheck --check-prefixes=CHECK,RV32SVVPTC %s ; RUN: llc -mtriple=riscv32 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV32SVINVAL %s ; RUN: llc -mtriple=riscv32 -mattr=+xcvalu %s -o - | FileCheck --check-prefix=RV32XCVALU %s ; RUN: llc -mtriple=riscv32 -mattr=+xcvbitmanip %s -o - | FileCheck --check-prefix=RV32XCVBITMANIP %s @@ -202,6 +203,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+svbare %s -o - | FileCheck --check-prefixes=CHECK,RV64SVBARE %s ; RUN: llc -mtriple=riscv64 -mattr=+svnapot %s -o - | FileCheck --check-prefixes=CHECK,RV64SVNAPOT %s ; RUN: llc -mtriple=riscv64 -mattr=+svpbmt %s -o - | FileCheck --check-prefixes=CHECK,RV64SVPBMT %s +; RUN: llc -mtriple=riscv64 -mattr=+svvptc %s -o - | FileCheck --check-prefixes=CHECK,RV64SVVPTC %s ; RUN: llc -mtriple=riscv64 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV64SVINVAL %s ; RUN: llc -mtriple=riscv64 -mattr=+xventanacondops %s -o - | FileCheck --check-prefixes=CHECK,RV64XVENTANACONDOPS %s ; RUN: llc -mtriple=riscv64 -mattr=+xsfvfwmaccqqq %s -o - | FileCheck --check-prefix=RV64XSFVFWMACCQQQ %s @@ -358,6 +360,7 @@ ; RV32SVBARE: .attribute 5, "rv32i2p1_svbare1p0" ; RV32SVNAPOT: .attribute 5, "rv32i2p1_svnapot1p0" ; RV32SVPBMT: .attribute 5, "rv32i2p1_svpbmt1p0" +; RV32SVVPTC: .attribute 5, "rv32i2p1_svvptc1p0" ; RV32SVINVAL: .attribute 5, "rv32i2p1_svinval1p0" ; RV32XCVALU: .attribute 5, "rv32i2p1_xcvalu1p0" ; RV32XCVBITMANIP: .attribute 5, "rv32i2p1_xcvbitmanip1p0" @@ -500,6 +503,7 @@ ; RV64SVBARE: .attribute 5, "rv64i2p1_svbare1p0" ; RV64SVNAPOT: .attribute 5, "rv64i2p1_svnapot1p0" ; RV64SVPBMT: .attribute 5, "rv64i2p1_svpbmt1p0" +; RV64SVVPTC: .attribute 5, "rv64i2p1_svvptc1p0" ; RV64SVINVAL: .attribute 5, "rv64i2p1_svinval1p0" ; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p1_xventanacondops1p0" ; RV64XSFVFWMACCQQQ: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0" diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 2332bcacc99d06b..a744a660a7076f6 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -375,6 +375,9 @@ .attribute arch, "rv32i_svbare1p0" # CHECK: attribute 5, "rv32i2p1_svbare1p0" +.attribute arch, "rv32i_svvptc1p0" +# CHECK: attribute 5, "rv32i2p1_svvptc1p0" + .attribute arch, "rv32i_zfbfmin1p0" # CHECK: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfbfmin1p0" diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index e402b1a40de34d4..30b5f3ce3cb084e 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -1092,6 +1092,7 @@ R"(All available -march extensions for RISC-V svinval 1.0 svnapot 1.0 svpbmt 1.0 + svvptc 1.0 xcvalu 1.0 xcvbi 1.0 xcvbitmanip 1.0 From 233e64d8e4211e6c3c94a986c28f9993da2b8de0 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 28 Oct 2024 11:56:40 +0000 Subject: [PATCH 161/425] [gn build] Port 0be1883c36fc --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 0586704850a51b2..1b73f37b0b1b8a5 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -430,6 +430,8 @@ if (current_toolchain == default_toolchain) { "__filesystem/recursive_directory_iterator.h", "__filesystem/space_info.h", "__filesystem/u8path.h", + "__flat_map/flat_map.h", + "__flat_map/sorted_unique.h", "__format/buffer.h", "__format/concepts.h", "__format/container_adaptor.h", From cbaecb5d299c48ea23319704445c1ee8405f4c04 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 28 Oct 2024 11:56:42 +0000 Subject: [PATCH 162/425] [gn build] Port 3d6923dbac16 --- .../secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn | 1 + .../gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn index f23bccf9d511856..61e4f8da3c04def 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn @@ -56,6 +56,7 @@ static_library("bugprone") { "MultipleStatementMacroCheck.cpp", "NoEscapeCheck.cpp", "NonZeroEnumToBoolConversionCheck.cpp", + "NondeterministicPointerIterationOrderCheck.cpp", "NotNullTerminatedResultCheck.cpp", "OptionalValueConversionCheck.cpp", "ParentVirtualCallCheck.cpp", diff --git a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn index 7a6c360e88c14e8..fe211726eb5d59d 100644 --- a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn @@ -99,8 +99,6 @@ static_library("Checkers") { "ObjCUnusedIVarsChecker.cpp", "PaddingChecker.cpp", "PointerArithChecker.cpp", - "PointerIterationChecker.cpp", - "PointerSortingChecker.cpp", "PointerSubChecker.cpp", "PthreadLockChecker.cpp", "PutenvStackArrayChecker.cpp", @@ -141,8 +139,8 @@ static_library("Checkers") { "VforkChecker.cpp", "VirtualCallChecker.cpp", "WebKit/ASTUtils.cpp", - "WebKit/RawPtrRefMemberChecker.cpp", "WebKit/PtrTypesSemantics.cpp", + "WebKit/RawPtrRefMemberChecker.cpp", "WebKit/RefCntblBaseVirtualDtorChecker.cpp", "WebKit/UncountedCallArgsChecker.cpp", "WebKit/UncountedLambdaCapturesChecker.cpp", From 595ec4e4be7de4258dd346e606b4a03a5eb0ce8a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 28 Oct 2024 11:56:42 +0000 Subject: [PATCH 163/425] [gn build] Port 5aa1275d03b6 --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn index 0a97bcf59112b42..b47189accd13651 100644 --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -293,6 +293,7 @@ copy("Headers") { "shaintrin.h", "sifive_vector.h", "sm3intrin.h", + "sm4evexintrin.h", "sm4intrin.h", "smmintrin.h", "stdalign.h", From 2a4bab3fac64833321287d9ba17dcad5255d0ab4 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 28 Oct 2024 11:56:43 +0000 Subject: [PATCH 164/425] [gn build] Port cfde4fbccf5d --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 1b73f37b0b1b8a5..28d41cda5aa8f43 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -571,7 +571,6 @@ if (current_toolchain == default_toolchain) { "__locale_dir/locale_base_api/fuchsia.h", "__locale_dir/locale_base_api/ibm.h", "__locale_dir/locale_base_api/musl.h", - "__locale_dir/locale_base_api/newlib.h", "__locale_dir/locale_base_api/openbsd.h", "__locale_dir/locale_base_api/win32.h", "__locale_dir/locale_guard.h", From 1164bd774783f33419e56cf10fdb87fbeab2b185 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 28 Oct 2024 11:56:44 +0000 Subject: [PATCH 165/425] [gn build] Port e146c1867e8d --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 28d41cda5aa8f43..8121e34dcf6eff7 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -566,8 +566,10 @@ if (current_toolchain == default_toolchain) { "__locale", "__locale_dir/locale_base_api.h", "__locale_dir/locale_base_api/android.h", + "__locale_dir/locale_base_api/apple.h", "__locale_dir/locale_base_api/bsd_locale_defaults.h", "__locale_dir/locale_base_api/bsd_locale_fallbacks.h", + "__locale_dir/locale_base_api/freebsd.h", "__locale_dir/locale_base_api/fuchsia.h", "__locale_dir/locale_base_api/ibm.h", "__locale_dir/locale_base_api/musl.h", From ba7555e640ea7fe341e19e0c1fffe5960b7c0d5b Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Mon, 28 Oct 2024 12:56:47 +0000 Subject: [PATCH 166/425] [RISCV] Mark the RVA23S64 and RVA23U64 profiles as non-experimental (#113826) All of the extensions used by these profile are themselves non-experimental, and RVA23 was just ratified . We lack a way of expressing `Ss1p13` (supervisor architecture 1.13), but this is a problem we have for RVA22 (Ss1p12) and RVA20 (Ss1p11) so I don't feel it's a blocker. --- clang/test/Driver/print-supported-extensions-riscv.c | 4 ++-- clang/test/Driver/riscv-profiles.c | 10 +++++----- llvm/docs/RISCVUsage.rst | 4 ++-- llvm/docs/ReleaseNotes.md | 1 + llvm/lib/Target/RISCV/RISCVProfiles.td | 4 ++-- llvm/test/CodeGen/RISCV/attributes.ll | 4 ++-- llvm/unittests/TargetParser/RISCVISAInfoTest.cpp | 8 ++++---- 7 files changed, 18 insertions(+), 17 deletions(-) diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index 4d93144724dffb4..e39847b9c31a8ed 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -191,12 +191,12 @@ // CHECK-NEXT: rva20u64 // CHECK-NEXT: rva22s64 // CHECK-NEXT: rva22u64 +// CHECK-NEXT: rva23s64 +// CHECK-NEXT: rva23u64 // CHECK-NEXT: rvi20u32 // CHECK-NEXT: rvi20u64 // CHECK-EMPTY: // CHECK-NEXT: Experimental Profiles -// CHECK-NEXT: rva23s64 -// CHECK-NEXT: rva23u64 // CHECK-NEXT: rvb23s64 // CHECK-NEXT: rvb23u64 // CHECK-NEXT: rvm23u32 diff --git a/clang/test/Driver/riscv-profiles.c b/clang/test/Driver/riscv-profiles.c index 42e23cf57c880f0..d85ac8baf4edd96 100644 --- a/clang/test/Driver/riscv-profiles.c +++ b/clang/test/Driver/riscv-profiles.c @@ -111,7 +111,7 @@ // RVA22S64: "-target-feature" "+svinval" // RVA22S64: "-target-feature" "+svpbmt" -// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23u64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23u64 \ // RUN: | FileCheck -check-prefix=RVA23U64 %s // RVA23U64: "-target-feature" "+m" // RVA23U64: "-target-feature" "+a" @@ -148,7 +148,7 @@ // RVA23U64: "-target-feature" "+zvfhmin" // RVA23U64: "-target-feature" "+zvkt" -// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23s64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23s64 \ // RUN: | FileCheck -check-prefix=RVA23S64 %s // RVA23S64: "-target-feature" "+m" // RVA23S64: "-target-feature" "+a" @@ -323,6 +323,6 @@ // RUN: not %clang --target=riscv64 -### -c %s 2>&1 -march=rva22u64zfa | FileCheck -check-prefix=INVALID-ADDITIONAL %s // INVALID-ADDITIONAL: error: invalid arch name 'rva22u64zfa', additional extensions must be after separator '_' -// RUN: not %clang --target=riscv64 -### -c %s 2>&1 -march=rva23u64 | FileCheck -check-prefix=EXPERIMENTAL-NOFLAG %s -// EXPERIMENTAL-NOFLAG: error: invalid arch name 'rva23u64' -// EXPERIMENTAL-NOFLAG: requires '-menable-experimental-extensions' for profile 'rva23u64' +// RUN: not %clang --target=riscv32 -### -c %s 2>&1 -march=rvm23u32 | FileCheck -check-prefix=EXPERIMENTAL-NOFLAG %s +// EXPERIMENTAL-NOFLAG: error: invalid arch name 'rvm23u32' +// EXPERIMENTAL-NOFLAG: requires '-menable-experimental-extensions' for profile 'rvm23u32' diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index dae6f7c46cc48c6..04f2c357766d448 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -82,6 +82,8 @@ ISA naming string. Currently supported profiles: * ``rva20s64`` * ``rva22u64`` * ``rva22s64`` +* ``rva23u64`` +* ``rva23s64`` Note that you can also append additional extension names to be enabled, e.g. ``rva20u64_zicond`` will enable the ``zicond`` extension in addition to those @@ -91,8 +93,6 @@ Profiles that are not yet ratified cannot be used unless ``-menable-experimental-extensions`` (or equivalent for other tools) is specified. This applies to the following profiles: -* ``rva23u64`` -* ``rva23s64`` * ``rvb23u64`` * ``rvb23s64`` * ``rvm23u32`` diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 22a32d110855d55..ac7a795daf791ab 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -184,6 +184,7 @@ Changes to the RISC-V Backend * The `Smmpm`, `Smnpm`, `Ssnpm`, `Supm`, and `Sspm` pointer masking extensions are no longer marked as experimental. * The `Sha` extension is now supported. +* The RVA23U64 and RVA23S64 profiles are no longer marked as experimental. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/RISCVProfiles.td b/llvm/lib/Target/RISCV/RISCVProfiles.td index 78c076fdb0b26ed..ea0fe08abd7a147 100644 --- a/llvm/lib/Target/RISCV/RISCVProfiles.td +++ b/llvm/lib/Target/RISCV/RISCVProfiles.td @@ -161,8 +161,8 @@ def RVA20U64 : RISCVProfile<"rva20u64", RVA20U64Features>; def RVA20S64 : RISCVProfile<"rva20s64", RVA20S64Features>; def RVA22U64 : RISCVProfile<"rva22u64", RVA22U64Features>; def RVA22S64 : RISCVProfile<"rva22s64", RVA22S64Features>; -def RVA23U64 : RISCVExperimentalProfile<"rva23u64", RVA23U64Features>; -def RVA23S64 : RISCVExperimentalProfile<"rva23s64", RVA23S64Features>; +def RVA23U64 : RISCVProfile<"rva23u64", RVA23U64Features>; +def RVA23S64 : RISCVProfile<"rva23s64", RVA23S64Features>; def RVB23U64 : RISCVExperimentalProfile<"rvb23u64", RVB23U64Features>; def RVB23S64 : RISCVExperimentalProfile<"rvb23s64", RVB23S64Features>; def RVM23U32 : RISCVExperimentalProfile<"rvm23u32", RVM23U32Features>; diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index e5b308a172661c4..2545c7075e4cf55 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -291,8 +291,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+rva20s64 %s -o - | FileCheck --check-prefix=RVA20S64 %s ; RUN: llc -mtriple=riscv64 -mattr=+rva22u64 %s -o - | FileCheck --check-prefix=RVA22U64 %s ; RUN: llc -mtriple=riscv64 -mattr=+rva22s64 %s -o - | FileCheck --check-prefix=RVA22S64 %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-rva23u64 %s -o - | FileCheck --check-prefix=RVA23U64 %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-rva23s64 %s -o - | FileCheck --check-prefix=RVA23S64 %s +; RUN: llc -mtriple=riscv64 -mattr=+rva23u64 %s -o - | FileCheck --check-prefix=RVA23U64 %s +; RUN: llc -mtriple=riscv64 -mattr=+rva23s64 %s -o - | FileCheck --check-prefix=RVA23S64 %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-rvb23u64 %s -o - | FileCheck --check-prefix=RVB23U64 %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-rvb23s64 %s -o - | FileCheck --check-prefix=RVB23S64 %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-rvm23u32 %s -o - | FileCheck --check-prefix=RVM23U32 %s diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 30b5f3ce3cb084e..48792ad0265fc46 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -774,8 +774,8 @@ TEST(ParseArchString, TEST(ParseArchString, RejectsExperimentalProfilesIfEnableExperimentalExtensionsNotSet) { EXPECT_EQ( - toString(RISCVISAInfo::parseArchString("rva23u64", false).takeError()), - "requires '-menable-experimental-extensions' for profile 'rva23u64'"); + toString(RISCVISAInfo::parseArchString("rvm23u32", false).takeError()), + "requires '-menable-experimental-extensions' for profile 'rvm23u32'"); } TEST(ToFeatures, IIsDroppedAndExperimentalExtensionsArePrefixed) { @@ -1136,12 +1136,12 @@ Supported Profiles rva20u64 rva22s64 rva22u64 + rva23s64 + rva23u64 rvi20u32 rvi20u64 Experimental Profiles - rva23s64 - rva23u64 rvb23s64 rvb23u64 rvm23u32 From ddd463be7edc3f3d03a6e88917fff66ef84210e9 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Mon, 28 Oct 2024 13:01:22 +0000 Subject: [PATCH 167/425] [AArch64] Add getStreamingHazardSize() to AArch64Subtarget (#113679) This is defined by the `-aarch64-streaming-hazard-size` option or its alias `-aarch64-stack-hazard-size` (the original name). It has been renamed to be more general as this option will (for the time being) be used to detect if the current target has streaming mode memory hazards. --------- Co-authored-by: Hari Limaye --- .../Target/AArch64/AArch64FrameLowering.cpp | 30 +++++++++++-------- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 11 +++++++ llvm/lib/Target/AArch64/AArch64Subtarget.h | 5 ++++ 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 275070b332ac87f..dfaa36f7f512d80 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -126,14 +126,15 @@ // and the SME unit try to access the same area of memory, including if the // access is to an area of the stack. To try to alleviate this we attempt to // introduce extra padding into the stack frame between FP and GPR accesses, -// controlled by the StackHazardSize option. Without changing the layout of the -// stack frame in the diagram above, a stack object of size StackHazardSize is -// added between GPR and FPR CSRs. Another is added to the stack objects -// section, and stack objects are sorted so that FPR > Hazard padding slot > -// GPRs (where possible). Unfortunately some things are not handled well (VLA -// area, arguments on the stack, object with both GPR and FPR accesses), but if -// those are controlled by the user then the entire stack frame becomes GPR at -// the start/end with FPR in the middle, surrounded by Hazard padding. +// controlled by the aarch64-stack-hazard-size option. Without changing the +// layout of the stack frame in the diagram above, a stack object of size +// aarch64-stack-hazard-size is added between GPR and FPR CSRs. Another is added +// to the stack objects section, and stack objects are sorted so that FPR > +// Hazard padding slot > GPRs (where possible). Unfortunately some things are +// not handled well (VLA area, arguments on the stack, objects with both GPR and +// FPR accesses), but if those are controlled by the user then the entire stack +// frame becomes GPR at the start/end with FPR in the middle, surrounded by +// Hazard padding. // // An example of the prologue: // @@ -273,9 +274,6 @@ cl::opt EnableHomogeneousPrologEpilog( cl::desc("Emit homogeneous prologue and epilogue for the size " "optimization (default = off)")); -// Stack hazard padding size. 0 = disabled. -static cl::opt StackHazardSize("aarch64-stack-hazard-size", - cl::init(0), cl::Hidden); // Stack hazard size for analysis remarks. StackHazardSize takes precedence. static cl::opt StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0), @@ -1617,6 +1615,10 @@ static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } +static unsigned getStackHazardSize(const MachineFunction &MF) { + return MF.getSubtarget().getStreamingHazardSize(); +} + // Convenience function to determine whether I is an SVE callee save. static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { switch (I->getOpcode()) { @@ -2988,6 +2990,7 @@ static void computeCalleeSaveRegisterPairs( bool IsWindows = isTargetWindows(MF); bool NeedsWinCFI = needsWinCFI(MF); AArch64FunctionInfo *AFI = MF.getInfo(); + unsigned StackHazardSize = getStackHazardSize(MF); MachineFrameInfo &MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction().getCallingConv(); unsigned Count = CSI.size(); @@ -3615,6 +3618,7 @@ static std::optional getLdStFrameID(const MachineInstr &MI, // which can be used to determine if any hazard padding is needed. void AArch64FrameLowering::determineStackHazardSlot( MachineFunction &MF, BitVector &SavedRegs) const { + unsigned StackHazardSize = getStackHazardSize(MF); if (StackHazardSize == 0 || StackHazardSize % 16 != 0 || MF.getInfo()->hasStackHazardSlotIndex()) return; @@ -3805,7 +3809,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // StackHazardSize if so. determineStackHazardSlot(MF, SavedRegs); if (AFI->hasStackHazardSlotIndex()) - CSStackSize += StackHazardSize; + CSStackSize += getStackHazardSize(MF); // Save number of saved regs, so we can easily update CSStackSize later. unsigned NumSavedRegs = SavedRegs.count(); @@ -3920,6 +3924,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( std::vector &CSI, unsigned &MinCSFrameIndex, unsigned &MaxCSFrameIndex) const { bool NeedsWinCFI = needsWinCFI(MF); + unsigned StackHazardSize = getStackHazardSize(MF); // To match the canonical windows frame layout, reverse the list of // callee saved registers to get them laid out by PrologEpilogInserter // in the right order. (PrologEpilogInserter allocates stack objects top @@ -5154,6 +5159,7 @@ void AArch64FrameLowering::emitRemarks( if (Attrs.hasNonStreamingInterfaceAndBody()) return; + unsigned StackHazardSize = getStackHazardSize(MF); const uint64_t HazardSize = (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 32db1e8c2477a85..7fb2a961e0313d3 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -76,6 +76,16 @@ static cl::opt AArch64MinimumJumpTableEntries( "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on AArch64")); +static cl::opt AArch64StreamingHazardSize( + "aarch64-streaming-hazard-size", + cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."), + cl::init(0), cl::Hidden); + +static cl::alias AArch64StreamingStackHazardSize( + "aarch64-stack-hazard-size", + cl::desc("alias for -aarch64-streaming-hazard-size"), + cl::aliasopt(AArch64StreamingHazardSize)); + unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) return OverrideVectorInsertExtractBaseCost; @@ -333,6 +343,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), IsLittle(LittleEndian), IsStreaming(IsStreaming), IsStreamingCompatible(IsStreamingCompatible), + StreamingHazardSize(AArch64StreamingHazardSize), MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)), diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 9856415361e50d7..50adb7cbf69a872 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -84,6 +84,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool IsStreaming; bool IsStreamingCompatible; + unsigned StreamingHazardSize; unsigned MinSVEVectorSizeInBits; unsigned MaxSVEVectorSizeInBits; unsigned VScaleForTuning = 2; @@ -172,6 +173,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { /// Returns true if the function has a streaming-compatible body. bool isStreamingCompatible() const { return IsStreamingCompatible; } + /// Returns the size of memory region that if accessed by both the CPU and + /// the SME unit could result in a hazard. 0 = disabled. + unsigned getStreamingHazardSize() const { return StreamingHazardSize; } + /// Returns true if the target has NEON and the function at runtime is known /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE /// mode, which disables NEON instructions). From 9090430d4176fa260b8da46b7b983b3760d452be Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Mon, 28 Oct 2024 14:28:07 +0100 Subject: [PATCH 168/425] Add clang::lifetimebound annotation to StringRef constructors. (#113878) Adding the lifetimebound annotation to the ArrayRef's array constructor can enable us to detect the following use-after-free issues: ``` llvm::StringRef TestZoneName() { char test[] = "foo"; // oops, missing static return test; // use-after-free. } ``` See #113533 --- llvm/include/llvm/ADT/StringRef.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index f879bbf7164fd6c..0dcd4d90086eff5 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -81,7 +81,7 @@ namespace llvm { StringRef(std::nullptr_t) = delete; /// Construct a string ref from a cstring. - /*implicit*/ constexpr StringRef(const char *Str) + /*implicit*/ constexpr StringRef(const char *Str LLVM_LIFETIME_BOUND) : View(Str, Str ? // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 @@ -93,7 +93,8 @@ namespace llvm { } /// Construct a string ref from a pointer and length. - /*implicit*/ constexpr StringRef(const char *data, size_t length) + /*implicit*/ constexpr StringRef(const char *data LLVM_LIFETIME_BOUND, + size_t length) : View(data, length) {} /// Construct a string ref from an std::string. From b1ede8fcb45d91092f5afe6c88d7a548f14ed848 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Mon, 28 Oct 2024 14:43:10 +0100 Subject: [PATCH 169/425] [reland][libc][bazel] Enable software prefetching for memcpy (#113886) This will affect only Bazel configuration for now. This is a reland of #108939 which has been reverted because of codegen issues fixed by https://github.com/llvm/llvm-project/pull/113161. --- .../bazel/llvm-project-overlay/libc/libc_configure_options.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl index f65da9e98226b62..96d7fa86e9ddf25 100644 --- a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl +++ b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl @@ -24,7 +24,7 @@ LIBC_CONFIGURE_OPTIONS = [ # Documentation in libc/src/string/memory_utils/... # "LIBC_COPT_MEMCPY_USE_EMBEDDED_TINY", # "LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE", - # "LIBC_COPT_MEMCPY_X86_USE_SOFTWARE_PREFETCHING", + "LIBC_COPT_MEMCPY_X86_USE_SOFTWARE_PREFETCHING", "LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING", # Documentation in libc/docs/dev/printf_behavior.rst From ce0368eb8417f2d369499bb98b1f0ccbe2219598 Mon Sep 17 00:00:00 2001 From: SpencerAbson Date: Mon, 28 Oct 2024 13:55:16 +0000 Subject: [PATCH 170/425] [AArch64] Add assembly/disassembly for PMLAL/PMULL instructions (#113564) This patch adds assembly/disassembly for the following SVE_AES2 instructions - PMLAL - PMULL - In accordance with: https://developer.arm.com/documentation/ddi0602/latest/ --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 4 ++ llvm/lib/Target/AArch64/SVEInstrFormats.td | 40 +++++++++++++++++++ .../MC/AArch64/SVE2p1/pmlal-diagnostics.s | 37 +++++++++++++++++ llvm/test/MC/AArch64/SVE2p1/pmlal.s | 33 +++++++++++++++ .../MC/AArch64/SVE2p1/pmull-diagnostics.s | 37 +++++++++++++++++ llvm/test/MC/AArch64/SVE2p1/pmull.s | 33 +++++++++++++++ 6 files changed, 184 insertions(+) create mode 100644 llvm/test/MC/AArch64/SVE2p1/pmlal-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/pmlal.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/pmull-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p1/pmull.s diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index dc96b249c4e40ce..11c64df2eb9278e 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3928,6 +3928,10 @@ let Predicates = [HasSVEAES2, HasSVE2p1orSSVE_AES] in { def AESD_4ZZI_B : sve_crypto_binary_multi4<0b0100, "aesd">; def AESEMC_4ZZI_B : sve_crypto_binary_multi4<0b1000, "aesemc">; def AESDMIC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">; + + // SVE_AES2 multi-vector polynomial multiply + def PMLAL_2ZZZ_Q : sve_crypto_pmlal_multi<"pmlal">; + def PMULL_2ZZZ_Q : sve_crypto_pmull_multi<"pmull">; } // End HasSVEAES2, HasSVE2p1orSSVE_AES //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 02ee0fe9244572c..72cbad17bc049f6 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -8879,6 +8879,46 @@ class sve_crypto_binary_multi4 opc, string asm> let hasSideEffects = 0; } +class sve_crypto_pmlal_multi +: I<(outs ZZ_q_mul_r:$Zda), + (ins ZZ_q_mul_r:$_Zda, ZPR64:$Zn, ZPR64:$Zm), + asm, + "\t$Zda, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zm; + bits<5> Zn; + bits<4> Zda; + let Inst{31-21} = 0b01000101001; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b111111; + let Inst{9-5} = Zn; + let Inst{4-1} = Zda; + let Inst{0} = 0b0; + + let Constraints = "$Zda = $_Zda"; + let hasSideEffects = 0; +} + +class sve_crypto_pmull_multi +: I<(outs ZZ_q_mul_r:$Zd), + (ins ZPR64:$Zn, ZPR64:$Zm), + asm, + "\t$Zd, $Zn, $Zm", + "", + []>, Sched<[]> { + bits<5> Zm; + bits<5> Zn; + bits<4> Zd; + let Inst{31-21} = 0b01000101001; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b111110; + let Inst{9-5} = Zn; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; + let hasSideEffects = 0; +} + //===----------------------------------------------------------------------===// // SVE BFloat16 Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AArch64/SVE2p1/pmlal-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/pmlal-diagnostics.s new file mode 100644 index 000000000000000..61c2b6eff969d74 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/pmlal-diagnostics.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+sve-aes2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +pmlal {z0.q-z2.q}, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: pmlal {z0.q-z2.q}, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +pmlal {z0.q-z0.q}, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: pmlal {z0.q-z0.q}, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +pmlal {z1.q-z2.q}, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: pmlal {z1.q-z2.q}, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +pmlal {z0.d-z1.d}, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: pmlal {z0.d-z1.d}, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single source vectors + +pmlal {z0.q-z1.q}, z0.s, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: pmlal {z0.q-z1.q}, z0.s, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +pmlal {z0.q-z1.q}, z0.d, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: pmlal {z0.q-z1.q}, z0.d, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p1/pmlal.s b/llvm/test/MC/AArch64/SVE2p1/pmlal.s new file mode 100644 index 000000000000000..0420b230956c08b --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/pmlal.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+ssve-aes < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve-aes2,+sve2p1 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve-aes2,+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve-aes2,+sve2p1 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve-aes2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve-aes2,+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +pmlal {z0.q-z1.q}, z0.d, z0.d // 01000101-00100000-11111100-00000000 +// CHECK-INST: pmlal { z0.q, z1.q }, z0.d, z0.d +// CHECK-ENCODING: [0x00,0xfc,0x20,0x45] +// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2 +// CHECK-UNKNOWN: 4520fc00 + +pmlal {z22.q-z23.q}, z13.d, z8.d // 01000101-00101000-11111101-10110110 +// CHECK-INST: pmlal { z22.q, z23.q }, z13.d, z8.d +// CHECK-ENCODING: [0xb6,0xfd,0x28,0x45] +// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2 +// CHECK-UNKNOWN: 4528fdb6 + +pmlal {z30.q-z31.q}, z31.d, z31.d // 01000101-00111111-11111111-11111110 +// CHECK-INST: pmlal { z30.q, z31.q }, z31.d, z31.d +// CHECK-ENCODING: [0xfe,0xff,0x3f,0x45] +// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2 +// CHECK-UNKNOWN: 453ffffe \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p1/pmull-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/pmull-diagnostics.s new file mode 100644 index 000000000000000..3aaef0cddf4a070 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/pmull-diagnostics.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+sve-aes2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +pmull {z0.q-z2.q}, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: pmull {z0.q-z2.q}, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +pmull {z0.q-z0.q}, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: pmull {z0.q-z0.q}, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +pmull {z1.q-z2.q}, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: pmull {z1.q-z2.q}, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +pmull {z0.d-z1.d}, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: pmull {z0.d-z1.d}, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single source vectors + +pmull {z0.q-z1.q}, z0.s, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: pmull {z0.q-z1.q}, z0.s, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +pmull {z0.q-z1.q}, z0.d, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: pmull {z0.q-z1.q}, z0.d, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p1/pmull.s b/llvm/test/MC/AArch64/SVE2p1/pmull.s new file mode 100644 index 000000000000000..9c3ee16401c1215 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/pmull.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+ssve-aes < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve-aes2,+sve2p1 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve-aes2,+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve-aes2,+sve2p1 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve-aes2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+sve2p1 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve-aes2,+sve2p1 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +pmull {z0.q-z1.q}, z0.d, z0.d // 01000101-00100000-11111000-00000000 +// CHECK-INST: pmull { z0.q, z1.q }, z0.d, z0.d +// CHECK-ENCODING: [0x00,0xf8,0x20,0x45] +// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2 +// CHECK-UNKNOWN: 4520f800 + +pmull {z22.q-z23.q}, z13.d, z8.d // 01000101-00101000-11111001-10110110 +// CHECK-INST: pmull { z22.q, z23.q }, z13.d, z8.d +// CHECK-ENCODING: [0xb6,0xf9,0x28,0x45] +// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2 +// CHECK-UNKNOWN: 4528f9b6 + +pmull {z30.q-z31.q}, z31.d, z31.d // 01000101-00111111-11111011-11111110 +// CHECK-INST: pmull { z30.q, z31.q }, z31.d, z31.d +// CHECK-ENCODING: [0xfe,0xfb,0x3f,0x45] +// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2 +// CHECK-UNKNOWN: 453ffbfe \ No newline at end of file From 7152bf3bc805b8d9b1873058ab0a084d7b6079d6 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 28 Oct 2024 06:37:24 -0700 Subject: [PATCH 171/425] [SLP]Do not create new vector node if scalars fully overlap with the existing one If the list of scalars vectorized as the part of the same vector node, no need to generate vector node again, it will be handled as part of overlapping matching. Fixes #113810 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 9 +- .../full-overlap-non-schedulable.ll | 93 +++++++++++++++++++ 2 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2afd02dae3a8b8a..268546fe99e1383 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7947,8 +7947,13 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Nodes.insert(E); SmallPtrSet Values(VL.begin(), VL.end()); if (any_of(Nodes, [&](const TreeEntry *E) { - return all_of(E->Scalars, - [&](Value *V) { return Values.contains(V); }); + if (all_of(E->Scalars, + [&](Value *V) { return Values.contains(V); })) + return true; + SmallPtrSet EValues(E->Scalars.begin(), + E->Scalars.end()); + return ( + all_of(VL, [&](Value *V) { return EValues.contains(V); })); })) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n"); if (TryToFindDuplicates(S)) diff --git a/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll new file mode 100644 index 000000000000000..dbd91199c24ecd1 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s + +define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[TOP:.*:]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr> [[TMP4]], <4 x i64> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 2 +; CHECK-NEXT: br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]] +; CHECK: [[L41]]: +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x ptr> [[TMP5]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> zeroinitializer, <4 x i32> [[TMP8]] +; CHECK-NEXT: br label %[[L112:.*]] +; CHECK: [[L42]]: +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[DOTNOT280:%.*]] = icmp eq i32 [[TMP10]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> , i32 [[TMP1]], i32 2 +; CHECK-NEXT: br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]] +; CHECK: [[L47]]: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 1 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <2 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x ptr> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> zeroinitializer, <2 x i32> [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> , i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP18]], <2 x i32> [[TMP17]], i64 2) +; CHECK-NEXT: br label %[[L112]] +; CHECK: [[L112]]: +; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ [[TMP11]], %[[L42]] ] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0 +; CHECK-NEXT: store i32 [[TMP21]], ptr [[P2]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1 +; CHECK-NEXT: store i32 [[TMP22]], ptr [[P1]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2 +; CHECK-NEXT: store i32 [[TMP23]], ptr [[P2]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3 +; CHECK-NEXT: store i32 [[TMP24]], ptr [[P1]], align 4 +; CHECK-NEXT: ret void +; +top: + %2 = getelementptr i8, ptr %0, i64 8 + %3 = getelementptr i8, ptr %0, i64 12 + %4 = getelementptr i8, ptr %0, i64 16 + %5 = getelementptr i8, ptr %0, i64 20 + br i1 %c1, label %L42, label %L41 + +L41: + %.not276 = icmp eq ptr %2, null + %6 = load i32, ptr %2, align 4 + %7 = select i1 %.not276, i32 0, i32 %6 + %.not277 = icmp eq ptr %3, null + %8 = load i32, ptr %3, align 4 + %9 = select i1 %.not277, i32 0, i32 %8 + %.not278 = icmp eq ptr %4, null + %10 = load i32, ptr %4, align 4 + %11 = select i1 %.not278, i32 0, i32 %10 + %.not279 = icmp eq ptr %5, null + %12 = load i32, ptr %5, align 4 + %13 = select i1 %.not279, i32 0, i32 %12 + br label %L112 + +L42: + %14 = load i32, ptr %2, align 4 + %.not280 = icmp eq i32 %14, 0 + br i1 %.not280, label %L112, label %L47 + +L47: + %15 = load i32, ptr %3, align 4 + %.not282 = icmp eq ptr %4, null + %16 = load i32, ptr %4, align 4 + %17 = select i1 %.not282, i32 0, i32 %16 + %.not283 = icmp eq ptr %5, null + %18 = load i32, ptr %5, align 4 + %19 = select i1 %.not283, i32 0, i32 %18 + br label %L112 + +L112: + %value_phi13336 = phi i32 [ %19, %L47 ], [ %13, %L41 ], [ 0, %L42 ] + %value_phi12335 = phi i32 [ %17, %L47 ], [ %11, %L41 ], [ %1, %L42 ] + %value_phi11334 = phi i32 [ %15, %L47 ], [ %9, %L41 ], [ 0, %L42 ] + %value_phi10333 = phi i32 [ 0, %L47 ], [ %7, %L41 ], [ 0, %L42 ] + store i32 %value_phi10333, ptr %p2, align 4 + store i32 %value_phi11334, ptr %p1, align 4 + store i32 %value_phi12335, ptr %p2, align 4 + store i32 %value_phi13336, ptr %p1, align 4 + ret void +} From d4c41804175e9cb37266c410cafe9caaac1819ca Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Oct 2024 07:17:19 -0700 Subject: [PATCH 172/425] [Clang] Add a flag to include GPU startup files (#112025) Summary: The C library for GPUs provides the ability to target regular C/C++ programs by providing the C library and a file containing kernels that call the `main` function. This is mostly used for unit tests, this patch provides a quick way to add them without needing to know the paths. I currently do this explicitly, but according to the libc++ contributors we don't want to need to specify these paths manually. See the discussion in https://github.com/llvm/llvm-project/pull/104515. I just default to `lib/` if the target-specific one isn't found because the linker will handle giving a reasonable error message if it's not found. Basically the use-case looks like this. ```console $ clang test.c --target=amdgcn-amd-amdhsa -mcpu=native -startfiles -stdlib $ amdhsa-loader a.out PASS! ``` --- clang/include/clang/Driver/Options.td | 4 ++++ clang/lib/Driver/ToolChains/AMDGPU.cpp | 11 +++++++++++ clang/lib/Driver/ToolChains/Cuda.cpp | 11 +++++++++++ clang/test/Driver/amdgpu-toolchain.c | 4 ++++ clang/test/Driver/cuda-cross-compiling.c | 8 ++++++++ 5 files changed, 38 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5df6ddd5e6a0c5e..7b28e8b4c31ec1e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5633,6 +5633,7 @@ def noprebind : Flag<["-"], "noprebind">; def noprofilelib : Flag<["-"], "noprofilelib">; def noseglinkedit : Flag<["-"], "noseglinkedit">; def nostartfiles : Flag<["-"], "nostartfiles">, Group; +def startfiles : Flag<["-"], "startfiles">, Group; def nostdinc : Flag<["-"], "nostdinc">, Visibility<[ClangOption, CLOption, DXCOption]>, Group, HelpText<"Disable both standard system #include directories and builtin #include directories">; @@ -5645,6 +5646,9 @@ def nostdincxx : Flag<["-"], "nostdinc++">, Visibility<[ClangOption, CC1Option]> def nostdlib : Flag<["-"], "nostdlib">, Visibility<[ClangOption, CLOption, FlangOption, DXCOption]>, Group; +def stdlib : Flag<["-"], "stdlib">, + Visibility<[ClangOption, CLOption, FlangOption, DXCOption]>, + Group; def nostdlibxx : Flag<["-"], "nostdlib++">; def object : Flag<["-"], "object">; def o : JoinedOrSeparate<["-"], "o">, diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 2c85d21ebd738c7..a8061ffd9321f58 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -648,6 +648,17 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ","))); } + if (Args.hasArg(options::OPT_stdlib)) + CmdArgs.append({"-lc", "-lm"}); + if (Args.hasArg(options::OPT_startfiles)) { + std::optional IncludePath = getToolChain().getStdlibPath(); + if (!IncludePath) + IncludePath = "/lib"; + SmallString<128> P(*IncludePath); + llvm::sys::path::append(P, "crt1.o"); + CmdArgs.push_back(Args.MakeArgString(P)); + } + CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); C.addCommand(std::make_unique( diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 412b379304b1e6f..ddd5ea248ca0cca 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -643,6 +643,17 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME); CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath)); + if (Args.hasArg(options::OPT_stdlib)) + CmdArgs.append({"-lc", "-lm"}); + if (Args.hasArg(options::OPT_startfiles)) { + std::optional IncludePath = getToolChain().getStdlibPath(); + if (!IncludePath) + IncludePath = "/lib"; + SmallString<128> P(*IncludePath); + llvm::sys::path::append(P, "crt1.o"); + CmdArgs.push_back(Args.MakeArgString(P)); + } + C.addCommand(std::make_unique( JA, *this, ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8, diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c index b60d31bae627009..c1c5aa8e90e6868 100644 --- a/clang/test/Driver/amdgpu-toolchain.c +++ b/clang/test/Driver/amdgpu-toolchain.c @@ -32,3 +32,7 @@ // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \ // RUN: -r %s 2>&1 | FileCheck -check-prefixes=RELO %s // RELO-NOT: -shared + +// RUN: %clang -target amdgcn-amd-amdhsa -march=gfx90a -stdlib -startfiles \ +// RUN: -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=STARTUP %s +// STARTUP: ld.lld{{.*}}"-lc" "-lm" "{{.*}}crt1.o" diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c index 126e9e9fc83d577..baf370483003155 100644 --- a/clang/test/Driver/cuda-cross-compiling.c +++ b/clang/test/Driver/cuda-cross-compiling.c @@ -105,3 +105,11 @@ // RUN: | FileCheck -check-prefix=FEATURE %s // FEATURE: clang-nvlink-wrapper{{.*}}"--plugin-opt=-mattr=+ptx63" + +// +// Test including the libc startup files and libc +// +// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_61 -stdlib -startfiles \ +// RUN: -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=STARTUP %s + +// STARTUP: clang-nvlink-wrapper{{.*}}"-lc" "-lm" "{{.*}}crt1.o" From 42eb54b7743df421af10ebe14b67bb79b46ecabb Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Oct 2024 07:17:50 -0700 Subject: [PATCH 173/425] [Clang] Put offloading globals in the `.llvm.rodata.offloading` section (#111890) Summary: For our offloading entries, we currently store all the string names of kernels that the runtime will need to load from the target executable. These are available via pointer in the `__tgt_offload_entry` struct, however this makes it difficult to obtain from the object itself. This patch simply puts the strings in a named section so they can be easily queried. The motivation behind this is that when the linker wrapper is doing linking, it wants to know which kernels the host executable is calling. We *could* get this already via the `.relaomp_offloading_entires` section and trawling through the string table, but that's quite annoying and not portable. The follow-up to this should be to make the linker wrapper get a list of all used symbols the device link job should count as "needed" so we can handle static linking more directly. --- clang/test/CodeGenCUDA/offloading-entries.cu | 65 ++++++++++++++------ llvm/lib/Frontend/Offloading/Utility.cpp | 8 +++ 2 files changed, 53 insertions(+), 20 deletions(-) diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu index ec21f018607ff01..259e3324e8ac94f 100644 --- a/clang/test/CodeGenCUDA/offloading-entries.cu +++ b/clang/test/CodeGenCUDA/offloading-entries.cu @@ -15,48 +15,48 @@ #include "Inputs/cuda.h" //. -// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 // CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 -// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" +// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 // CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 -// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" +// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 // CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1 -// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" +// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 // CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1 -// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" +// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 // CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1 //. -// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 // HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1 -// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" +// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 // HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1 -// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" +// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 // HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1 -// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" +// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 // HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1 -// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" +// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 // HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1 //. -// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 // CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" +// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 // CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" +// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 // CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" +// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 // CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1 -// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" +// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 // CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1 //. -// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00" +// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1 // HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00" +// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1 // HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00" +// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1 // HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00" +// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1 // HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1 -// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00" +// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1 // HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1 //. // CUDA-LABEL: @_Z18__device_stub__foov( @@ -137,3 +137,28 @@ template struct __attribute__((device_builtin_texture_type)) texture : public textureReference {}; texture tex; +//. +// CUDA: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name} +// CUDA: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1} +// CUDA: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2} +// CUDA: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3} +// CUDA: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4} +//. +// HIP: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name} +// HIP: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1} +// HIP: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2} +// HIP: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3} +// HIP: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4} +//. +// CUDA-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name} +// CUDA-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1} +// CUDA-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2} +// CUDA-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3} +// CUDA-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4} +//. +// HIP-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name} +// HIP-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1} +// HIP-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2} +// HIP-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3} +// HIP-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4} +//. diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp index 010c0bfd3be76b3..7a0a7afcfcb5c9d 100644 --- a/llvm/lib/Frontend/Offloading/Utility.cpp +++ b/llvm/lib/Frontend/Offloading/Utility.cpp @@ -53,7 +53,15 @@ offloading::getOffloadingEntryInitializer(Module &M, Constant *Addr, auto *Str = new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true, GlobalValue::InternalLinkage, AddrName, Prefix); + StringRef SectionName = ".llvm.rodata.offloading"; Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + Str->setSection(SectionName); + Str->setAlignment(Align(1)); + + // Make a metadata node for these constants so it can be queried from IR. + NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols"); + Metadata *MDVals[] = {ConstantAsMetadata::get(Str)}; + MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals)); // Construct the offloading entry. Constant *EntryData[] = { From bd6ab32e6eb642f2b0b15be8c7c2a668192f07d8 Mon Sep 17 00:00:00 2001 From: Yusuke MINATO Date: Mon, 28 Oct 2024 23:19:20 +0900 Subject: [PATCH 174/425] Revert "[flang] Integrate the option -flang-experimental-integer-overflow into -fno-wrapv" (#113901) Reverts llvm/llvm-project#110063 due to the performance regression on 503.bwaves_r in SPEC2017. --- clang/include/clang/Driver/Options.td | 4 + clang/lib/Driver/ToolChains/Flang.cpp | 1 + flang/include/flang/Lower/LoweringOptions.def | 5 + .../flang/Optimizer/Transforms/Passes.h | 4 +- .../flang/Optimizer/Transforms/Passes.td | 2 +- flang/include/flang/Tools/CrossToolHelpers.h | 2 +- flang/lib/Frontend/CompilerInvocation.cpp | 6 + flang/lib/Frontend/FrontendActions.cpp | 4 +- flang/lib/Lower/Bridge.cpp | 2 +- flang/lib/Lower/IO.cpp | 2 +- flang/lib/Optimizer/Passes/Pipelines.cpp | 6 +- .../Transforms/ControlFlowConverter.cpp | 8 +- flang/test/Driver/frontend-forwarding.f90 | 2 + .../Fir/convert-to-llvm-openmp-and-fir.fir | 10 +- flang/test/Fir/loop01.fir | 230 +++++++++++++++++- flang/test/Fir/loop02.fir | 4 +- flang/test/Lower/HLFIR/goto-do-body.f90 | 4 +- .../OpenMP/parallel-private-clause-fixes.f90 | 4 +- ...oop-reduction-allocatable-array-minmax.f90 | 4 +- flang/test/Lower/OpenMP/wsloop-variable.f90 | 4 +- flang/test/Lower/array-character.f90 | 2 +- .../test/Lower/array-derived-assignments.f90 | 2 +- flang/test/Lower/array-derived.f90 | 2 +- .../array-elemental-calls-char-byval.f90 | 2 +- .../test/Lower/array-elemental-calls-char.f90 | 2 +- .../Lower/array-expression-assumed-size.f90 | 2 +- flang/test/Lower/array-expression-slice-1.f90 | 2 +- flang/test/Lower/array-substring.f90 | 46 +++- flang/test/Lower/array-temp.f90 | 2 +- flang/test/Lower/components.f90 | 2 +- flang/test/Lower/do_loop.f90 | 73 ++++-- flang/test/Lower/do_loop_unstructured.f90 | 203 +++++++++++++++- flang/test/Lower/goto-do-body.f90 | 4 +- flang/test/Lower/host-associated.f90 | 2 +- flang/test/Lower/infinite_loop.f90 | 41 +++- flang/test/Lower/io-implied-do-fixes.f90 | 49 +++- flang/test/Lower/loops2.f90 | 2 +- flang/test/Lower/mixed_loops.f90 | 6 +- flang/test/Lower/vector-subscript-io.f90 | 34 +-- flang/tools/bbc/bbc.cpp | 9 +- 40 files changed, 676 insertions(+), 119 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 7b28e8b4c31ec1e..f86e90e14c477ba 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6791,6 +6791,10 @@ def flang_deprecated_no_hlfir : Flag<["-"], "flang-deprecated-no-hlfir">, Flags<[HelpHidden]>, Visibility<[FlangOption, FC1Option]>, HelpText<"Do not use HLFIR lowering (deprecated)">; +def flang_experimental_integer_overflow : Flag<["-"], "flang-experimental-integer-overflow">, + Flags<[HelpHidden]>, Visibility<[FlangOption, FC1Option]>, + HelpText<"Add nsw flag to internal operations such as do-variable increment (experimental)">; + //===----------------------------------------------------------------------===// // FLangOption + CoreOption + NoXarchOption //===----------------------------------------------------------------------===// diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 366cadc2e547752..a9d2b7a4dc48f9f 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -148,6 +148,7 @@ void Flang::addCodegenOptions(const ArgList &Args, Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir, options::OPT_flang_deprecated_no_hlfir, + options::OPT_flang_experimental_integer_overflow, options::OPT_fno_ppc_native_vec_elem_order, options::OPT_fppc_native_vec_elem_order}); } diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index 0b22e54b648e94a..231de533fbd30ac 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -38,5 +38,10 @@ ENUM_LOWERINGOPT(Underscoring, unsigned, 1, 1) /// (i.e. wraps around as two's complement). Off by default. ENUM_LOWERINGOPT(IntegerWrapAround, unsigned, 1, 0) +/// If true, add nsw flags to loop variable increments. +/// Off by default. +/// TODO: integrate this option with the above +ENUM_LOWERINGOPT(NSWOnLoopVarInc, unsigned, 1, 0) + #undef LOWERINGOPT #undef ENUM_LOWERINGOPT diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index e1453cfa374bfc4..e8f0a8444a31a1e 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -66,7 +66,7 @@ namespace fir { std::unique_ptr createAffineDemotionPass(); std::unique_ptr createArrayValueCopyPass(fir::ArrayValueCopyOptions options = {}); -std::unique_ptr createCFGConversionPassWithoutNSW(); +std::unique_ptr createCFGConversionPassWithNSW(); std::unique_ptr createMemDataFlowOptPass(); std::unique_ptr createPromoteToAffinePass(); std::unique_ptr @@ -83,7 +83,7 @@ createVScaleAttrPass(std::pair vscaleAttr); void populateCfgConversionRewrites(mlir::RewritePatternSet &patterns, bool forceLoopToExecuteOnce = false, - bool setNSW = true); + bool setNSW = false); // declarative passes #define GEN_PASS_REGISTRATION diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index 54b43adbfc6bbf0..a41f0f348f27a65 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -153,7 +153,7 @@ def CFGConversion : Pass<"cfg-conversion"> { /*default=*/"false", "force the body of a loop to execute at least once">, Option<"setNSW", "set-nsw", "bool", - /*default=*/"true", + /*default=*/"false", "set nsw on loop variable increment"> ]; } diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h index 1626970600eff20..df4b21ada058fe3 100644 --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -122,7 +122,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks { bool NoSignedZerosFPMath = false; ///< Set no-signed-zeros-fp-math attribute for functions. bool UnsafeFPMath = false; ///< Set unsafe-fp-math attribute for functions. - bool NSWOnLoopVarInc = true; ///< Add nsw flag to loop variable increments. + bool NSWOnLoopVarInc = false; ///< Add nsw flag to loop variable increments. }; struct OffloadModuleOpts { diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 5da5236af2b0e17..94d3d1154178775 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1350,6 +1350,12 @@ bool CompilerInvocation::createFromArgs( invoc.loweringOpts.setNoPPCNativeVecElemOrder(true); } + // -flang-experimental-integer-overflow + if (args.hasArg( + clang::driver::options::OPT_flang_experimental_integer_overflow)) { + invoc.loweringOpts.setNSWOnLoopVarInc(true); + } + // Preserve all the remark options requested, i.e. -Rpass, -Rpass-missed or // -Rpass-analysis. This will be used later when processing and outputting the // remarks generated by LLVM in ExecuteCompilerInvocation.cpp. diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 3ea242315484fd7..f2e460fc53a67f4 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -828,8 +828,8 @@ void CodeGenAction::generateLLVMIR() { config.VScaleMax = vsr->second; } - if (ci.getInvocation().getLoweringOpts().getIntegerWrapAround()) - config.NSWOnLoopVarInc = false; + if (ci.getInvocation().getLoweringOpts().getNSWOnLoopVarInc()) + config.NSWOnLoopVarInc = true; // Create the pass pipeline fir::createMLIRToLLVMPassPipeline(pm, config, getCurrentFile()); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index a3bd1ace11da213..877fe122265dd0d 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2271,7 +2271,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { assert(!incrementLoopNestInfo.empty() && "empty loop nest"); mlir::Location loc = toLocation(); mlir::arith::IntegerOverflowFlags flags{}; - if (!getLoweringOptions().getIntegerWrapAround()) + if (getLoweringOptions().getNSWOnLoopVarInc()) flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw); auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get( builder->getContext(), flags); diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp index b534c81a605a905..1894b0cfd1bec29 100644 --- a/flang/lib/Lower/IO.cpp +++ b/flang/lib/Lower/IO.cpp @@ -929,7 +929,7 @@ static void genIoLoop(Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder = converter.getFirOpBuilder(); mlir::Location loc = converter.getCurrentLocation(); mlir::arith::IntegerOverflowFlags flags{}; - if (!converter.getLoweringOptions().getIntegerWrapAround()) + if (converter.getLoweringOptions().getNSWOnLoopVarInc()) flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw); auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get(builder.getContext(), flags); diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp index 091b7c4c164e400..3c139f7e93405ca 100644 --- a/flang/lib/Optimizer/Passes/Pipelines.cpp +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -35,11 +35,11 @@ void addCanonicalizerPassWithoutRegionSimplification(mlir::OpPassManager &pm) { void addCfgConversionPass(mlir::PassManager &pm, const MLIRToLLVMPassPipelineConfig &config) { if (config.NSWOnLoopVarInc) + addNestedPassToAllTopLevelOperationsConditionally( + pm, disableCfgConversion, fir::createCFGConversionPassWithNSW); + else addNestedPassToAllTopLevelOperationsConditionally(pm, disableCfgConversion, fir::createCFGConversion); - else - addNestedPassToAllTopLevelOperationsConditionally( - pm, disableCfgConversion, fir::createCFGConversionPassWithoutNSW); } void addAVC(mlir::PassManager &pm, const llvm::OptimizationLevel &optLevel) { diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp index 411bf7f364a6028..3b79d6d311b71ca 100644 --- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp +++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp @@ -332,6 +332,8 @@ class CfgConversion : public fir::impl::CFGConversionBase { public: using CFGConversionBase::CFGConversionBase; + CfgConversion(bool setNSW) { this->setNSW = setNSW; } + void runOnOperation() override { auto *context = &this->getContext(); mlir::RewritePatternSet patterns(context); @@ -364,8 +366,6 @@ void fir::populateCfgConversionRewrites(mlir::RewritePatternSet &patterns, patterns.getContext(), forceLoopToExecuteOnce, setNSW); } -std::unique_ptr fir::createCFGConversionPassWithoutNSW() { - fir::CFGConversionOptions options; - options.setNSW = false; - return fir::createCFGConversion(options); +std::unique_ptr fir::createCFGConversionPassWithNSW() { + return std::make_unique(true); } diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90 index 55a74ccf40467b2..ff2d66095214648 100644 --- a/flang/test/Driver/frontend-forwarding.f90 +++ b/flang/test/Driver/frontend-forwarding.f90 @@ -20,6 +20,7 @@ ! RUN: -fversion-loops-for-stride \ ! RUN: -flang-experimental-hlfir \ ! RUN: -flang-deprecated-no-hlfir \ +! RUN: -flang-experimental-integer-overflow \ ! RUN: -fno-ppc-native-vector-element-order \ ! RUN: -fppc-native-vector-element-order \ ! RUN: -mllvm -print-before-all \ @@ -51,6 +52,7 @@ ! CHECK: "-fversion-loops-for-stride" ! CHECK: "-flang-experimental-hlfir" ! CHECK: "-flang-deprecated-no-hlfir" +! CHECK: "-flang-experimental-integer-overflow" ! CHECK: "-fno-ppc-native-vector-element-order" ! CHECK: "-fppc-native-vector-element-order" ! CHECK: "-Rpass" diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 00f8e6e6cc9a6b2..335877e7c9a8725 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -358,10 +358,10 @@ func.func @_QPopenmp_target_data_region() { %9 = arith.subi %8, %c1_i64 : i64 %10 = fir.coordinate_of %0, %9 : (!fir.ref>, i64) -> !fir.ref fir.store %6 to %10 : !fir.ref - %11 = arith.addi %arg0, %c1 overflow : index + %11 = arith.addi %arg0, %c1 : index %12 = fir.convert %c1 : (index) -> i32 %13 = fir.load %1 : !fir.ref - %14 = arith.addi %13, %12 overflow : i32 + %14 = arith.addi %13, %12 : i32 fir.result %11, %14 : index, i32 } fir.store %5#1 to %1 : !fir.ref @@ -404,11 +404,11 @@ func.func @_QPopenmp_target_data_region() { // CHECK: %[[VAL_21:.*]] = llvm.sub %[[VAL_19]], %[[VAL_20]] : i64 // CHECK: %[[VAL_22:.*]] = llvm.getelementptr %[[VAL_1]][0, %[[VAL_21]]] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: llvm.store %[[VAL_17]], %[[VAL_22]] : i32, !llvm.ptr -// CHECK: %[[VAL_23:.*]] = llvm.add %[[VAL_12]], %[[VAL_8]] overflow : i64 +// CHECK: %[[VAL_23:.*]] = llvm.add %[[VAL_12]], %[[VAL_8]] : i64 // CHECK: %[[VAL_24:.*]] = llvm.trunc %[[VAL_8]] : i64 to i32 // CHECK: %[[VAL_25:.*]] = llvm.load %[[VAL_3]] : !llvm.ptr -> i32 -// CHECK: %[[VAL_26:.*]] = llvm.add %[[VAL_25]], %[[VAL_24]] overflow : i32 -// CHECK: %[[VAL_27:.*]] = llvm.add %[[VAL_12]], %[[VAL_8]] overflow : i64 +// CHECK: %[[VAL_26:.*]] = llvm.add %[[VAL_25]], %[[VAL_24]] : i32 +// CHECK: %[[VAL_27:.*]] = llvm.add %[[VAL_12]], %[[VAL_8]] : i64 // CHECK: %[[VAL_28:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[VAL_29:.*]] = llvm.sub %[[VAL_14]], %[[VAL_28]] : i64 // CHECK: llvm.br ^bb1(%[[VAL_27]], %[[VAL_26]], %[[VAL_29]] : i64, i32, i64) diff --git a/flang/test/Fir/loop01.fir b/flang/test/Fir/loop01.fir index 30d10b9bbdb9792..c1cbb522c378c0e 100644 --- a/flang/test/Fir/loop01.fir +++ b/flang/test/Fir/loop01.fir @@ -1,7 +1,5 @@ // RUN: fir-opt --split-input-file --cfg-conversion %s | FileCheck %s -// RUN: fir-opt --split-input-file --cfg-conversion="set-nsw=false" %s | FileCheck %s --check-prefix=NO-NSW - -// NO-NSW-NOT: overflow +// RUN: fir-opt --split-input-file --cfg-conversion="set-nsw=true" %s | FileCheck %s --check-prefix=NSW func.func @x(%lb : index, %ub : index, %step : index, %b : i1, %addr : !fir.ref) { fir.do_loop %iv = %lb to %ub step %step unordered { @@ -37,7 +35,7 @@ func.func private @f2() -> i1 // CHECK: fir.store %[[VAL_12]] to %[[VAL_4]] : !fir.ref // CHECK: br ^bb5 // CHECK: ^bb5: -// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] overflow : index +// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] : index // CHECK: %[[VAL_14:.*]] = arith.constant 1 : index // CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index // CHECK: br ^bb1(%[[VAL_13]], %[[VAL_15]] : index, index) @@ -46,6 +44,34 @@ func.func private @f2() -> i1 // CHECK: } // CHECK: func private @f2() -> i1 +// NSW: func @x(%[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index, %[[VAL_3:.*]]: i1, %[[VAL_4:.*]]: !fir.ref) { +// NSW: %[[VAL_5:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index +// NSW: %[[VAL_6:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] : index +// NSW: %[[VAL_7:.*]] = arith.divsi %[[VAL_6]], %[[VAL_2]] : index +// NSW: br ^bb1(%[[VAL_0]], %[[VAL_7]] : index, index) +// NSW: ^bb1(%[[VAL_8:.*]]: index, %[[VAL_9:.*]]: index): +// NSW: %[[VAL_10:.*]] = arith.constant 0 : index +// NSW: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index +// NSW: cond_br %[[VAL_11]], ^bb2, ^bb6 +// NSW: ^bb2: +// NSW: cond_br %[[VAL_3]], ^bb3, ^bb4 +// NSW: ^bb3: +// NSW: fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref +// NSW: br ^bb5 +// NSW: ^bb4: +// NSW: %[[VAL_12:.*]] = arith.constant 0 : index +// NSW: fir.store %[[VAL_12]] to %[[VAL_4]] : !fir.ref +// NSW: br ^bb5 +// NSW: ^bb5: +// NSW: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] overflow : index +// NSW: %[[VAL_14:.*]] = arith.constant 1 : index +// NSW: %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index +// NSW: br ^bb1(%[[VAL_13]], %[[VAL_15]] : index, index) +// NSW: ^bb6: +// NSW: return +// NSW: } +// NSW: func private @f2() -> i1 + // ----- func.func @x2(%lo : index, %up : index, %ok : i1) { @@ -75,13 +101,36 @@ func.func private @f3(i16) // CHECK: cond_br %[[VAL_14]], ^bb2, ^bb3 // CHECK: ^bb2: // CHECK: %[[VAL_15:.*]] = fir.call @f2() : () -> i1 -// CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_4]], %[[VAL_3]] overflow : index +// CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_4]], %[[VAL_3]] : index // CHECK: br ^bb1(%[[VAL_16]], %[[VAL_15]] : index, i1) // CHECK: ^bb3: // CHECK: return // CHECK: } // CHECK: func private @f3(i16) +// NSW: func @x2(%[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: i1) { +// NSW: %[[VAL_3:.*]] = arith.constant 1 : index +// NSW: br ^bb1(%[[VAL_0]], %[[VAL_2]] : index, i1) +// NSW: ^bb1(%[[VAL_4:.*]]: index, %[[VAL_5:.*]]: i1): +// NSW: %[[VAL_6:.*]] = arith.constant 0 : index +// NSW: %[[VAL_7:.*]] = arith.cmpi slt, %[[VAL_6]], %[[VAL_3]] : index +// NSW: %[[VAL_8:.*]] = arith.cmpi sle, %[[VAL_4]], %[[VAL_1]] : index +// NSW: %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_3]], %[[VAL_6]] : index +// NSW: %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_4]] : index +// NSW: %[[VAL_11:.*]] = arith.andi %[[VAL_7]], %[[VAL_8]] : i1 +// NSW: %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1 +// NSW: %[[VAL_13:.*]] = arith.ori %[[VAL_11]], %[[VAL_12]] : i1 +// NSW: %[[VAL_14:.*]] = arith.andi %[[VAL_5]], %[[VAL_13]] : i1 +// NSW: cond_br %[[VAL_14]], ^bb2, ^bb3 +// NSW: ^bb2: +// NSW: %[[VAL_15:.*]] = fir.call @f2() : () -> i1 +// NSW: %[[VAL_16:.*]] = arith.addi %[[VAL_4]], %[[VAL_3]] overflow : index +// NSW: br ^bb1(%[[VAL_16]], %[[VAL_15]] : index, i1) +// NSW: ^bb3: +// NSW: return +// NSW: } +// NSW: func private @f3(i16) + // ----- // do_loop with an extra loop-carried value @@ -110,7 +159,7 @@ func.func @x3(%lo : index, %up : index) -> i1 { // CHECK: cond_br %[[VAL_11]], ^bb2, ^bb3 // CHECK: ^bb2: // CHECK: %[[VAL_12:.*]] = fir.call @f2() : () -> i1 -// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow : index +// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] : index // CHECK: %[[VAL_14:.*]] = arith.constant 1 : index // CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index // CHECK: br ^bb1(%[[VAL_13]], %[[VAL_12]], %[[VAL_15]] : index, i1, index) @@ -118,6 +167,29 @@ func.func @x3(%lo : index, %up : index) -> i1 { // CHECK: return %[[VAL_8]] : i1 // CHECK: } +// NSW-LABEL: func @x3( +// NSW-SAME: %[[VAL_0:.*]]: index, +// NSW-SAME: %[[VAL_1:.*]]: index) -> i1 { +// NSW: %[[VAL_2:.*]] = arith.constant 1 : index +// NSW: %[[VAL_3:.*]] = arith.constant true +// NSW: %[[VAL_4:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index +// NSW: %[[VAL_5:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] : index +// NSW: %[[VAL_6:.*]] = arith.divsi %[[VAL_5]], %[[VAL_2]] : index +// NSW: br ^bb1(%[[VAL_0]], %[[VAL_3]], %[[VAL_6]] : index, i1, index) +// NSW: ^bb1(%[[VAL_7:.*]]: index, %[[VAL_8:.*]]: i1, %[[VAL_9:.*]]: index): +// NSW: %[[VAL_10:.*]] = arith.constant 0 : index +// NSW: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index +// NSW: cond_br %[[VAL_11]], ^bb2, ^bb3 +// NSW: ^bb2: +// NSW: %[[VAL_12:.*]] = fir.call @f2() : () -> i1 +// NSW: %[[VAL_13:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow : index +// NSW: %[[VAL_14:.*]] = arith.constant 1 : index +// NSW: %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index +// NSW: br ^bb1(%[[VAL_13]], %[[VAL_12]], %[[VAL_15]] : index, i1, index) +// NSW: ^bb3: +// NSW: return %[[VAL_8]] : i1 +// NSW: } + // ----- // iterate_while with an extra loop-carried value @@ -155,7 +227,7 @@ func.func private @f4(i32) -> i1 // CHECK: cond_br %[[VAL_16]], ^bb2, ^bb3 // CHECK: ^bb2: // CHECK: %[[VAL_17:.*]] = fir.call @f2() : () -> i1 -// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow : index +// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] : index // CHECK: br ^bb1(%[[VAL_18]], %[[VAL_6]], %[[VAL_17]] : index, i1, i1) // CHECK: ^bb3: // CHECK: %[[VAL_19:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1 @@ -163,6 +235,34 @@ func.func private @f4(i32) -> i1 // CHECK: } // CHECK: func private @f4(i32) -> i1 +// NSW-LABEL: func @y3( +// NSW-SAME: %[[VAL_0:.*]]: index, +// NSW-SAME: %[[VAL_1:.*]]: index) -> i1 { +// NSW: %[[VAL_2:.*]] = arith.constant 1 : index +// NSW: %[[VAL_3:.*]] = arith.constant true +// NSW: %[[VAL_4:.*]] = fir.call @f2() : () -> i1 +// NSW: br ^bb1(%[[VAL_0]], %[[VAL_3]], %[[VAL_4]] : index, i1, i1) +// NSW: ^bb1(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: i1, %[[VAL_7:.*]]: i1): +// NSW: %[[VAL_8:.*]] = arith.constant 0 : index +// NSW: %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_8]], %[[VAL_2]] : index +// NSW: %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_5]], %[[VAL_1]] : index +// NSW: %[[VAL_11:.*]] = arith.cmpi slt, %[[VAL_2]], %[[VAL_8]] : index +// NSW: %[[VAL_12:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_5]] : index +// NSW: %[[VAL_13:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1 +// NSW: %[[VAL_14:.*]] = arith.andi %[[VAL_11]], %[[VAL_12]] : i1 +// NSW: %[[VAL_15:.*]] = arith.ori %[[VAL_13]], %[[VAL_14]] : i1 +// NSW: %[[VAL_16:.*]] = arith.andi %[[VAL_6]], %[[VAL_15]] : i1 +// NSW: cond_br %[[VAL_16]], ^bb2, ^bb3 +// NSW: ^bb2: +// NSW: %[[VAL_17:.*]] = fir.call @f2() : () -> i1 +// NSW: %[[VAL_18:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow : index +// NSW: br ^bb1(%[[VAL_18]], %[[VAL_6]], %[[VAL_17]] : index, i1, i1) +// NSW: ^bb3: +// NSW: %[[VAL_19:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1 +// NSW: return %[[VAL_19]] : i1 +// NSW: } +// NSW: func private @f4(i32) -> i1 + // ----- // do_loop that returns the final value of the induction @@ -191,7 +291,7 @@ func.func @x4(%lo : index, %up : index) -> index { // CHECK: ^bb2: // CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_6]] : (index) -> i32 // CHECK: %[[VAL_11:.*]] = fir.call @f4(%[[VAL_10]]) : (i32) -> i1 -// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] overflow : index +// CHECK: %[[VAL_12:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] : index // CHECK: %[[VAL_13:.*]] = arith.constant 1 : index // CHECK: %[[VAL_14:.*]] = arith.subi %[[VAL_7]], %[[VAL_13]] : index // CHECK: br ^bb1(%[[VAL_12]], %[[VAL_14]] : index, index) @@ -199,6 +299,29 @@ func.func @x4(%lo : index, %up : index) -> index { // CHECK: return %[[VAL_6]] : index // CHECK: } +// NSW-LABEL: func @x4( +// NSW-SAME: %[[VAL_0:.*]]: index, +// NSW-SAME: %[[VAL_1:.*]]: index) -> index { +// NSW: %[[VAL_2:.*]] = arith.constant 1 : index +// NSW: %[[VAL_3:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index +// NSW: %[[VAL_4:.*]] = arith.addi %[[VAL_3]], %[[VAL_2]] : index +// NSW: %[[VAL_5:.*]] = arith.divsi %[[VAL_4]], %[[VAL_2]] : index +// NSW: br ^bb1(%[[VAL_0]], %[[VAL_5]] : index, index) +// NSW: ^bb1(%[[VAL_6:.*]]: index, %[[VAL_7:.*]]: index): +// NSW: %[[VAL_8:.*]] = arith.constant 0 : index +// NSW: %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_7]], %[[VAL_8]] : index +// NSW: cond_br %[[VAL_9]], ^bb2, ^bb3 +// NSW: ^bb2: +// NSW: %[[VAL_10:.*]] = fir.convert %[[VAL_6]] : (index) -> i32 +// NSW: %[[VAL_11:.*]] = fir.call @f4(%[[VAL_10]]) : (i32) -> i1 +// NSW: %[[VAL_12:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] overflow : index +// NSW: %[[VAL_13:.*]] = arith.constant 1 : index +// NSW: %[[VAL_14:.*]] = arith.subi %[[VAL_7]], %[[VAL_13]] : index +// NSW: br ^bb1(%[[VAL_12]], %[[VAL_14]] : index, index) +// NSW: ^bb3: +// NSW: return %[[VAL_6]] : index +// NSW: } + // ----- // iterate_while that returns the final value of both inductions @@ -233,12 +356,38 @@ func.func @y4(%lo : index, %up : index) -> index { // CHECK: ^bb2: // CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_4]] : (index) -> i32 // CHECK: %[[VAL_16:.*]] = fir.call @f4(%[[VAL_15]]) : (i32) -> i1 -// CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] overflow : index +// CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] : index // CHECK: br ^bb1(%[[VAL_17]], %[[VAL_16]] : index, i1) // CHECK: ^bb3: // CHECK: return %[[VAL_4]] : index // CHECK: } +// NSW-LABEL: func @y4( +// NSW-SAME: %[[VAL_0:.*]]: index, +// NSW-SAME: %[[VAL_1:.*]]: index) -> index { +// NSW: %[[VAL_2:.*]] = arith.constant 1 : index +// NSW: %[[VAL_3:.*]] = arith.constant true +// NSW: br ^bb1(%[[VAL_0]], %[[VAL_3]] : index, i1) +// NSW: ^bb1(%[[VAL_4:.*]]: index, %[[VAL_5:.*]]: i1): +// NSW: %[[VAL_6:.*]] = arith.constant 0 : index +// NSW: %[[VAL_7:.*]] = arith.cmpi slt, %[[VAL_6]], %[[VAL_2]] : index +// NSW: %[[VAL_8:.*]] = arith.cmpi sle, %[[VAL_4]], %[[VAL_1]] : index +// NSW: %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_2]], %[[VAL_6]] : index +// NSW: %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_4]] : index +// NSW: %[[VAL_11:.*]] = arith.andi %[[VAL_7]], %[[VAL_8]] : i1 +// NSW: %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1 +// NSW: %[[VAL_13:.*]] = arith.ori %[[VAL_11]], %[[VAL_12]] : i1 +// NSW: %[[VAL_14:.*]] = arith.andi %[[VAL_5]], %[[VAL_13]] : i1 +// NSW: cond_br %[[VAL_14]], ^bb2, ^bb3 +// NSW: ^bb2: +// NSW: %[[VAL_15:.*]] = fir.convert %[[VAL_4]] : (index) -> i32 +// NSW: %[[VAL_16:.*]] = fir.call @f4(%[[VAL_15]]) : (i32) -> i1 +// NSW: %[[VAL_17:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] overflow : index +// NSW: br ^bb1(%[[VAL_17]], %[[VAL_16]] : index, i1) +// NSW: ^bb3: +// NSW: return %[[VAL_4]] : index +// NSW: } + // ----- // do_loop that returns the final induction value @@ -271,7 +420,7 @@ func.func @x5(%lo : index, %up : index) -> index { // CHECK: ^bb2: // CHECK: %[[VAL_12:.*]] = fir.call @f2() : () -> i1 // CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> i16 -// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow : index +// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] : index // CHECK: %[[VAL_15:.*]] = arith.constant 1 : index // CHECK: %[[VAL_16:.*]] = arith.subi %[[VAL_9]], %[[VAL_15]] : index // CHECK: br ^bb1(%[[VAL_14]], %[[VAL_13]], %[[VAL_16]] : index, i16, index) @@ -280,6 +429,31 @@ func.func @x5(%lo : index, %up : index) -> index { // CHECK: return %[[VAL_7]] : index // CHECK: } +// NSW-LABEL: func @x5( +// NSW-SAME: %[[VAL_0:.*]]: index, +// NSW-SAME: %[[VAL_1:.*]]: index) -> index { +// NSW: %[[VAL_2:.*]] = arith.constant 1 : index +// NSW: %[[VAL_3:.*]] = arith.constant 42 : i16 +// NSW: %[[VAL_4:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index +// NSW: %[[VAL_5:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] : index +// NSW: %[[VAL_6:.*]] = arith.divsi %[[VAL_5]], %[[VAL_2]] : index +// NSW: br ^bb1(%[[VAL_0]], %[[VAL_3]], %[[VAL_6]] : index, i16, index) +// NSW: ^bb1(%[[VAL_7:.*]]: index, %[[VAL_8:.*]]: i16, %[[VAL_9:.*]]: index): +// NSW: %[[VAL_10:.*]] = arith.constant 0 : index +// NSW: %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index +// NSW: cond_br %[[VAL_11]], ^bb2, ^bb3 +// NSW: ^bb2: +// NSW: %[[VAL_12:.*]] = fir.call @f2() : () -> i1 +// NSW: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> i16 +// NSW: %[[VAL_14:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow : index +// NSW: %[[VAL_15:.*]] = arith.constant 1 : index +// NSW: %[[VAL_16:.*]] = arith.subi %[[VAL_9]], %[[VAL_15]] : index +// NSW: br ^bb1(%[[VAL_14]], %[[VAL_13]], %[[VAL_16]] : index, i16, index) +// NSW: ^bb3: +// NSW: fir.call @f3(%[[VAL_8]]) : (i16) -> () +// NSW: return %[[VAL_7]] : index +// NSW: } + // ----- // iterate_while that returns the both induction values @@ -322,7 +496,7 @@ func.func @y5(%lo : index, %up : index) -> index { // CHECK: ^bb2: // CHECK: %[[VAL_17:.*]] = fir.call @f2() : () -> i1 // CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i1) -> i16 -// CHECK: %[[VAL_19:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow : index +// CHECK: %[[VAL_19:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] : index // CHECK: br ^bb1(%[[VAL_19]], %[[VAL_17]], %[[VAL_18]] : index, i1, i16) // CHECK: ^bb3: // CHECK: cond_br %[[VAL_6]], ^bb4, ^bb5 @@ -334,3 +508,37 @@ func.func @y5(%lo : index, %up : index) -> index { // CHECK: fir.call @f3(%[[VAL_7]]) : (i16) -> () // CHECK: return %[[VAL_5]] : index // CHECK: } + +// NSW-LABEL: func @y5( +// NSW-SAME: %[[VAL_0:.*]]: index, +// NSW-SAME: %[[VAL_1:.*]]: index) -> index { +// NSW: %[[VAL_2:.*]] = arith.constant 1 : index +// NSW: %[[VAL_3:.*]] = arith.constant 42 : i16 +// NSW: %[[VAL_4:.*]] = arith.constant true +// NSW: br ^bb1(%[[VAL_0]], %[[VAL_4]], %[[VAL_3]] : index, i1, i16) +// NSW: ^bb1(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: i1, %[[VAL_7:.*]]: i16): +// NSW: %[[VAL_8:.*]] = arith.constant 0 : index +// NSW: %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_8]], %[[VAL_2]] : index +// NSW: %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_5]], %[[VAL_1]] : index +// NSW: %[[VAL_11:.*]] = arith.cmpi slt, %[[VAL_2]], %[[VAL_8]] : index +// NSW: %[[VAL_12:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_5]] : index +// NSW: %[[VAL_13:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1 +// NSW: %[[VAL_14:.*]] = arith.andi %[[VAL_11]], %[[VAL_12]] : i1 +// NSW: %[[VAL_15:.*]] = arith.ori %[[VAL_13]], %[[VAL_14]] : i1 +// NSW: %[[VAL_16:.*]] = arith.andi %[[VAL_6]], %[[VAL_15]] : i1 +// NSW: cond_br %[[VAL_16]], ^bb2, ^bb3 +// NSW: ^bb2: +// NSW: %[[VAL_17:.*]] = fir.call @f2() : () -> i1 +// NSW: %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i1) -> i16 +// NSW: %[[VAL_19:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow : index +// NSW: br ^bb1(%[[VAL_19]], %[[VAL_17]], %[[VAL_18]] : index, i1, i16) +// NSW: ^bb3: +// NSW: cond_br %[[VAL_6]], ^bb4, ^bb5 +// NSW: ^bb4: +// NSW: %[[VAL_20:.*]] = arith.constant 0 : i32 +// NSW: %[[VAL_21:.*]] = fir.call @f4(%[[VAL_20]]) : (i32) -> i1 +// NSW: br ^bb5 +// NSW: ^bb5: +// NSW: fir.call @f3(%[[VAL_7]]) : (i16) -> () +// NSW: return %[[VAL_5]] : index +// NSW: } diff --git a/flang/test/Fir/loop02.fir b/flang/test/Fir/loop02.fir index fb209a9dfeb4269..50948e0e7aa6b53 100644 --- a/flang/test/Fir/loop02.fir +++ b/flang/test/Fir/loop02.fir @@ -31,7 +31,7 @@ func.func private @y(%addr : !fir.ref) // CHECK: cond_br %[[VAL_13]], ^bb2, ^bb3 // CHECK: ^bb2: // CHECK: fir.call @y(%[[VAL_0]]) : (!fir.ref) -> () -// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_10]], %[[VAL_2]] overflow : index +// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_10]], %[[VAL_2]] : index // CHECK: %[[VAL_15:.*]] = arith.constant 1 : index // CHECK: %[[VAL_16:.*]] = arith.subi %[[VAL_11]], %[[VAL_15]] : index // CHECK: br ^bb1(%[[VAL_14]], %[[VAL_16]] : index, index) @@ -54,7 +54,7 @@ func.func private @y(%addr : !fir.ref) // NOOPT: cond_br %[[VAL_9]], ^bb2, ^bb3 // NOOPT: ^bb2: // NOOPT: fir.call @y(%[[VAL_0]]) : (!fir.ref) -> () -// NOOPT: %[[VAL_10:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] overflow : index +// NOOPT: %[[VAL_10:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] : index // NOOPT: %[[VAL_11:.*]] = arith.constant 1 : index // NOOPT: %[[VAL_12:.*]] = arith.subi %[[VAL_7]], %[[VAL_11]] : index // NOOPT: br ^bb1(%[[VAL_10]], %[[VAL_12]] : index, index) diff --git a/flang/test/Lower/HLFIR/goto-do-body.f90 b/flang/test/Lower/HLFIR/goto-do-body.f90 index 5f5b09ccb8f7dcc..383b839e591e33d 100644 --- a/flang/test/Lower/HLFIR/goto-do-body.f90 +++ b/flang/test/Lower/HLFIR/goto-do-body.f90 @@ -40,7 +40,7 @@ subroutine sub1() ! CHECK: %[[TMP5:.*]] = arith.subi %[[TMP4]], %[[C1]] : i32 ! CHECK: fir.store %[[TMP5]] to %[[TRIP]] : !fir.ref ! CHECK: %[[TMP6:.*]] = fir.load %[[I]]#1 : !fir.ref -! CHECK: %[[TMP7:.*]] = arith.addi %[[TMP6]], %[[C1]] overflow : i32 +! CHECK: %[[TMP7:.*]] = arith.addi %[[TMP6]], %[[C1]] : i32 ! CHECK: fir.store %[[TMP7]] to %[[I]]#1 : !fir.ref ! CHECK: cf.br ^[[HEADER]] end do @@ -104,7 +104,7 @@ subroutine sub2() ! CHECK: fir.store %[[TMP9]] to %[[TRIP]] : !fir.ref ! CHECK: %[[TMP10:.*]] = fir.load %[[I]]#1 : !fir.ref ! CHECK: %[[STEP_VAL:.*]] = fir.load %[[STEP_VAR]] : !fir.ref -! CHECK: %[[TMP11:.*]] = arith.addi %[[TMP10]], %[[STEP_VAL]] overflow : i32 +! CHECK: %[[TMP11:.*]] = arith.addi %[[TMP10]], %[[STEP_VAL]] : i32 ! CHECK: fir.store %[[TMP11]] to %[[I]]#1 : !fir.ref ! CHECK: cf.br ^[[HEADER]] end do diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 index 99323e69113bcc6..5e76e8ff1663bf7 100644 --- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 +++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 @@ -82,10 +82,10 @@ ! CHECK: %[[VAL_15:.*]] = fir.load %[[PRIV_J_DECL]]#0 : !fir.ref ! CHECK: %[[VAL_16:.*]] = arith.addi %[[LOAD]], %[[VAL_15]] : i32 ! CHECK: hlfir.assign %[[VAL_16]] to %[[PRIV_X_DECL]]#0 : i32, !fir.ref -! CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] overflow : index +! CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] : index ! CHECK: %[[STEPCAST:.*]] = fir.convert %[[VAL_11]] : (index) -> i32 ! CHECK: %[[IVLOAD:.*]] = fir.load %[[PRIV_J_DECL]]#1 : !fir.ref -! CHECK: %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] overflow : +! CHECK: %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] ! CHECK: fir.result %[[VAL_17]], %[[IVINC]] : index, i32 ! CHECK: } ! CHECK: fir.store %[[VAL_12]]#1 to %[[PRIV_J_DECL]]#1 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 index ce45d09d77a22a1..a49eba69ff38cce 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 @@ -206,10 +206,10 @@ program reduce15 ! CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_47]] : (i32) -> i64 ! CHECK: %[[VAL_49:.*]] = hlfir.designate %[[VAL_46]] (%[[VAL_48]]) : (!fir.box>>, i64) -> !fir.ref ! CHECK: hlfir.assign %[[VAL_45]] to %[[VAL_49]] : i32, !fir.ref -! CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] overflow : index +! CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] : index ! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_40]] : (index) -> i32 ! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref -! CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_51]] overflow : i32 +! CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_51]] : i32 ! CHECK: fir.result %[[VAL_50]], %[[VAL_53]] : index, i32 ! CHECK: } ! CHECK: fir.store %[[VAL_54:.*]]#1 to %[[VAL_3]]#1 : !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index cc77ce754d97e9c..8d235c10fa1d602 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -150,10 +150,10 @@ subroutine wsloop_variable_sub !CHECK: %[[VAL_42:.*]] = arith.addi %[[VAL_40]], %[[VAL_41]] : i64 !CHECK: %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i64) -> f32 !CHECK: hlfir.assign %[[VAL_43]] to %[[VAL_21]]#0 : f32, !fir.ref -!CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_37]], %[[VAL_34]] overflow : index +!CHECK: %[[VAL_44:.*]] = arith.addi %[[VAL_37]], %[[VAL_34]] : index !CHECK: %[[VAL_45:.*]] = fir.convert %[[VAL_34]] : (index) -> i64 !CHECK: %[[VAL_46:.*]] = fir.load %[[VAL_17]]#1 : !fir.ref -!CHECK: %[[VAL_47:.*]] = arith.addi %[[VAL_46]], %[[VAL_45]] overflow : i64 +!CHECK: %[[VAL_47:.*]] = arith.addi %[[VAL_46]], %[[VAL_45]] : i64 !CHECK: fir.result %[[VAL_44]], %[[VAL_47]] : index, i64 !CHECK: } !CHECK: fir.store %[[VAL_48:.*]]#1 to %[[VAL_17]]#1 : !fir.ref diff --git a/flang/test/Lower/array-character.f90 b/flang/test/Lower/array-character.f90 index 53adc5c02958c33..c93ef4be30823cd 100644 --- a/flang/test/Lower/array-character.f90 +++ b/flang/test/Lower/array-character.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -hlfir=false -fwrapv %s -o - | fir-opt --canonicalize --cse | FileCheck %s +! RUN: bbc -hlfir=false %s -o - | fir-opt --canonicalize --cse | FileCheck %s ! CHECK-LABEL: func @_QPissue( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.boxchar<1>{{.*}}, %[[VAL_1:.*]]: !fir.boxchar<1>{{.*}}) { diff --git a/flang/test/Lower/array-derived-assignments.f90 b/flang/test/Lower/array-derived-assignments.f90 index f4e51271d593689..71e61f651302a37 100644 --- a/flang/test/Lower/array-derived-assignments.f90 +++ b/flang/test/Lower/array-derived-assignments.f90 @@ -1,5 +1,5 @@ ! Test derived type assignment lowering inside array expression -! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s +! RUN: bbc -hlfir=false %s -o - | FileCheck %s module array_derived_assign type simple_copy diff --git a/flang/test/Lower/array-derived.f90 b/flang/test/Lower/array-derived.f90 index a0c55f5d88255f8..b5eb7621c90f109 100644 --- a/flang/test/Lower/array-derived.f90 +++ b/flang/test/Lower/array-derived.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s +! RUN: bbc -hlfir=false %s -o - | FileCheck %s module cs type r diff --git a/flang/test/Lower/array-elemental-calls-char-byval.f90 b/flang/test/Lower/array-elemental-calls-char-byval.f90 index 682191fc7895620..c321614e7fc5b53 100644 --- a/flang/test/Lower/array-elemental-calls-char-byval.f90 +++ b/flang/test/Lower/array-elemental-calls-char-byval.f90 @@ -1,6 +1,6 @@ ! Test lowering of elemental calls with character argument ! with the VALUE attribute. -! RUN: bbc -hlfir=false -fwrapv -o - %s | FileCheck %s +! RUN: bbc -hlfir=false -o - %s | FileCheck %s module char_elem_byval diff --git a/flang/test/Lower/array-elemental-calls-char.f90 b/flang/test/Lower/array-elemental-calls-char.f90 index 00e2f8e8f9c1270..603cc677805fc9c 100644 --- a/flang/test/Lower/array-elemental-calls-char.f90 +++ b/flang/test/Lower/array-elemental-calls-char.f90 @@ -1,6 +1,6 @@ ! Test lowering of elemental calls with character argument ! without the VALUE attribute. -! RUN: bbc -hlfir=false -fwrapv -o - %s | FileCheck %s +! RUN: bbc -hlfir=false -o - %s | FileCheck %s module char_elem diff --git a/flang/test/Lower/array-expression-assumed-size.f90 b/flang/test/Lower/array-expression-assumed-size.f90 index 2fbf315aff11485..ae35da951538b8c 100644 --- a/flang/test/Lower/array-expression-assumed-size.f90 +++ b/flang/test/Lower/array-expression-assumed-size.f90 @@ -1,5 +1,5 @@ ! RUN: bbc --emit-fir -hlfir=false %s -o - | FileCheck %s -! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck --check-prefix=PostOpt %s +! RUN: bbc -hlfir=false %s -o - | FileCheck --check-prefix=PostOpt %s subroutine assumed_size_test(a) diff --git a/flang/test/Lower/array-expression-slice-1.f90 b/flang/test/Lower/array-expression-slice-1.f90 index b597814bc0d9f14..152450902432901 100644 --- a/flang/test/Lower/array-expression-slice-1.f90 +++ b/flang/test/Lower/array-expression-slice-1.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -hlfir=false -fwrapv -o - --outline-intrinsics %s | FileCheck %s +! RUN: bbc -hlfir=false -o - --outline-intrinsics %s | FileCheck %s ! CHECK-LABEL: func @_QQmain() attributes {fir.bindc_name = "p"} { ! CHECK-DAG: %[[VAL_0:.*]] = arith.constant 10 : index diff --git a/flang/test/Lower/array-substring.f90 b/flang/test/Lower/array-substring.f90 index 02101039120e9fc..2e283997e3e003a 100644 --- a/flang/test/Lower/array-substring.f90 +++ b/flang/test/Lower/array-substring.f90 @@ -1,7 +1,5 @@ ! RUN: bbc -hlfir=false %s -o - | FileCheck %s -! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s --check-prefix=NO-NSW - -! NO-NSW-NOT: overflow +! RUN: bbc -hlfir=false -integer-overflow %s -o - | FileCheck %s --check-prefix=NSW ! CHECK-LABEL: func @_QPtest( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.boxchar<1>{{.*}}) -> !fir.array<1x!fir.logical<4>> { @@ -34,9 +32,8 @@ ! CHECK: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> ! CHECK: %[[VAL_27:.*]] = fir.array_coor %[[VAL_8]](%[[VAL_9]]) %[[VAL_15]] : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> ! CHECK: fir.store %[[VAL_26]] to %[[VAL_27]] : !fir.ref> -! CHECK: %[[VAL_15_NSW:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] overflow : index ! CHECK: %[[VAL_28:.*]] = arith.subi %[[VAL_13]], %[[VAL_1]] : index -! CHECK: br ^bb1(%[[VAL_15_NSW]], %[[VAL_28]] : index, index) +! CHECK: br ^bb1(%[[VAL_15]], %[[VAL_28]] : index, index) ! CHECK: ^bb3: ! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_8]] : !fir.ref>> ! CHECK: return %[[VAL_29]] : !fir.array<1x!fir.logical<4>> @@ -49,3 +46,42 @@ function test(C) test = C(1:1)(1:8) == (/'ABCDabcd'/) end function test + +! NSW-LABEL: func @_QPtest( +! NSW-SAME: %[[VAL_0:.*]]: !fir.boxchar<1>{{.*}}) -> !fir.array<1x!fir.logical<4>> { +! NSW-DAG: %[[VAL_1:.*]] = arith.constant 1 : index +! NSW-DAG: %[[VAL_2:.*]] = arith.constant 0 : index +! NSW-DAG: %[[VAL_3:.*]] = arith.constant 0 : i32 +! NSW-DAG: %[[VAL_4:.*]] = arith.constant 8 : index +! NSW: %[[VAL_6:.*]]:2 = fir.unboxchar %[[VAL_0]] : (!fir.boxchar<1>) -> (!fir.ref>, index) +! NSW: %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref>) -> !fir.ref>> +! NSW: %[[VAL_8:.*]] = fir.alloca !fir.array<1x!fir.logical<4>> {bindc_name = "test", uniq_name = "_QFtestEtest"} +! NSW: %[[VAL_9:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +! NSW: %[[VAL_10:.*]] = fir.slice %[[VAL_1]], %[[VAL_1]], %[[VAL_1]] : (index, index, index) -> !fir.slice<1> +! NSW: %[[VAL_11:.*]] = fir.address_of(@_QQ{{.*}}) : !fir.ref>> +! NSW: br ^bb1(%[[VAL_2]], %[[VAL_1]] : index, index) +! NSW: ^bb1(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index): +! NSW: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_2]] : index +! NSW: cond_br %[[VAL_14]], ^bb2, ^bb3 +! NSW: ^bb2: +! NSW: %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] : index +! NSW: %[[VAL_16:.*]] = fir.array_coor %[[VAL_7]](%[[VAL_9]]) {{\[}}%[[VAL_10]]] %[[VAL_15]] : (!fir.ref>>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref> +! NSW: %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (!fir.ref>) -> !fir.ref>> +! NSW: %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_17]], %[[VAL_2]] : (!fir.ref>>, index) -> !fir.ref> +! NSW: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (!fir.ref>) -> !fir.ref> +! NSW: %[[VAL_20:.*]] = fir.array_coor %[[VAL_11]](%[[VAL_9]]) %[[VAL_15]] : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> +! NSW: %[[VAL_21:.*]] = fir.convert %[[VAL_19]] : (!fir.ref>) -> !fir.ref +! NSW: %[[VAL_22:.*]] = fir.convert %[[VAL_20]] : (!fir.ref>) -> !fir.ref +! NSW: %[[VAL_23:.*]] = fir.convert %[[VAL_4]] : (index) -> i64 +! NSW: %[[VAL_24:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_21]], %[[VAL_22]], %[[VAL_23]], %[[VAL_23]]) {{.*}}: (!fir.ref, !fir.ref, i64, i64) -> i32 +! NSW: %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_24]], %[[VAL_3]] : i32 +! NSW: %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4> +! NSW: %[[VAL_27:.*]] = fir.array_coor %[[VAL_8]](%[[VAL_9]]) %[[VAL_15]] : (!fir.ref>>, !fir.shape<1>, index) -> !fir.ref> +! NSW: fir.store %[[VAL_26]] to %[[VAL_27]] : !fir.ref> +! NSW: %[[VAL_15_NSW:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] overflow : index +! NSW: %[[VAL_28:.*]] = arith.subi %[[VAL_13]], %[[VAL_1]] : index +! NSW: br ^bb1(%[[VAL_15_NSW]], %[[VAL_28]] : index, index) +! NSW: ^bb3: +! NSW: %[[VAL_29:.*]] = fir.load %[[VAL_8]] : !fir.ref>> +! NSW: return %[[VAL_29]] : !fir.array<1x!fir.logical<4>> +! NSW: } diff --git a/flang/test/Lower/array-temp.f90 b/flang/test/Lower/array-temp.f90 index 718aef84a4e853c..10c5ee91d44bdae 100644 --- a/flang/test/Lower/array-temp.f90 +++ b/flang/test/Lower/array-temp.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s +! RUN: bbc -hlfir=false %s -o - | FileCheck %s ! CHECK-LABEL: func @_QPss1() subroutine ss1 diff --git a/flang/test/Lower/components.f90 b/flang/test/Lower/components.f90 index 28e836c5d10456a..e1582a8a31e0d4f 100644 --- a/flang/test/Lower/components.f90 +++ b/flang/test/Lower/components.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s +! RUN: bbc -hlfir=false %s -o - | FileCheck %s module components_test type t1 diff --git a/flang/test/Lower/do_loop.f90 b/flang/test/Lower/do_loop.f90 index 5d8343b8d68a45a..a46e6c947391b79 100644 --- a/flang/test/Lower/do_loop.f90 +++ b/flang/test/Lower/do_loop.f90 @@ -1,17 +1,17 @@ ! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false -o - %s | FileCheck %s ! RUN: %flang_fc1 -mllvm --use-desc-for-alloc=false -emit-fir -flang-deprecated-no-hlfir -o - %s | FileCheck %s -! RUN: %flang_fc1 -mllvm --use-desc-for-alloc=false -emit-fir -flang-deprecated-no-hlfir -fwrapv -o - %s | FileCheck %s --check-prefix=NO-NSW +! RUN: %flang_fc1 -mllvm --use-desc-for-alloc=false -emit-fir -flang-deprecated-no-hlfir -flang-experimental-integer-overflow -o - %s | FileCheck %s --check-prefix=NSW ! Simple tests for structured ordered loops with loop-control. ! Tests the structure of the loop, storage to index variable and return and ! storage of the final value of the index variable. -! NO-NSW-NOT: overflow - ! Test a simple loop with the final value of the index variable read outside the loop ! CHECK-LABEL: simple_loop +! NSW-LABEL: simple_loop subroutine simple_loop ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_loopEi"} + ! NSW: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_loopEi"} integer :: i ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 @@ -21,14 +21,18 @@ subroutine simple_loop ! CHECK: %[[C1:.*]] = arith.constant 1 : index ! CHECK: %[[LB:.*]] = fir.convert %[[C1_CVT]] : (index) -> i32 ! CHECK: %[[LI_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = + ! NSW: %[[LI_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = ! CHECK-SAME: %[[C1_CVT]] to %[[C5_CVT]] step %[[C1]] ! CHECK-SAME: iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) { do i=1,5 ! CHECK: fir.store %[[IV]] to %[[I_REF]] : !fir.ref - ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[C1]] overflow : index + ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[C1]] : index + ! NSW: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[C1:.*]] overflow : index ! CHECK: %[[STEPCAST:.*]] = fir.convert %[[C1]] : (index) -> i32 ! CHECK: %[[IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref - ! CHECK: %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] overflow : i32 + ! NSW: %[[IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] : i32 + ! NSW: %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST:.*]] overflow : i32 ! CHECK: fir.result %[[LI_NEXT]], %[[IVINC]] : index, i32 ! CHECK: } end do @@ -40,11 +44,14 @@ subroutine simple_loop ! Test a 2-nested loop with a body composed of a reduction. Values are read from a 2d array. ! CHECK-LABEL: nested_loop +! NSW-LABEL: nested_loop subroutine nested_loop ! CHECK: %[[ARR_REF:.*]] = fir.alloca !fir.array<5x5xi32> {bindc_name = "arr", uniq_name = "_QFnested_loopEarr"} ! CHECK: %[[ASUM_REF:.*]] = fir.alloca i32 {bindc_name = "asum", uniq_name = "_QFnested_loopEasum"} ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_loopEi"} + ! NSW: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_loopEi"} ! CHECK: %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_loopEj"} + ! NSW: %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_loopEj"} integer :: asum, arr(5,5) integer :: i, j asum = 0 @@ -55,6 +62,7 @@ subroutine nested_loop ! CHECK: %[[ST_I:.*]] = arith.constant 1 : index ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_I_CVT]] : (index) -> i32 ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = + ! NSW: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = ! CHECK-SAME: %[[S_I_CVT]] to %[[E_I_CVT]] step %[[ST_I]] ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) { do i=1,5 @@ -66,6 +74,7 @@ subroutine nested_loop ! CHECK: %[[ST_J:.*]] = arith.constant 1 : index ! CHECK: %[[J_LB:.*]] = fir.convert %[[S_J_CVT]] : (index) -> i32 ! CHECK: %[[J_RES:.*]]:2 = fir.do_loop %[[LJ:[^ ]*]] = + ! NSW: %[[J_RES:.*]]:2 = fir.do_loop %[[LJ:[^ ]*]] = ! CHECK-SAME: %[[S_J_CVT]] to %[[E_J_CVT]] step %[[ST_J]] ! CHECK-SAME: iter_args(%[[J_IV:.*]] = %[[J_LB]]) -> (index, i32) { do j=1,5 @@ -84,18 +93,24 @@ subroutine nested_loop ! CHECK: %[[ASUM_NEW:.*]] = arith.addi %[[ASUM]], %[[ARR_VAL]] : i32 ! CHECK: fir.store %[[ASUM_NEW]] to %[[ASUM_REF]] : !fir.ref asum = asum + arr(i,j) - ! CHECK: %[[LJ_NEXT:.*]] = arith.addi %[[LJ]], %[[ST_J]] overflow : index + ! CHECK: %[[LJ_NEXT:.*]] = arith.addi %[[LJ]], %[[ST_J]] : index + ! NSW: %[[LJ_NEXT:.*]] = arith.addi %[[LJ]], %[[ST_J:.*]] overflow : index ! CHECK: %[[J_STEPCAST:.*]] = fir.convert %[[ST_J]] : (index) -> i32 ! CHECK: %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref - ! CHECK: %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] overflow : i32 + ! NSW: %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref + ! CHECK: %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] : i32 + ! NSW: %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST:.*]] overflow : i32 ! CHECK: fir.result %[[LJ_NEXT]], %[[J_IVINC]] : index, i32 ! CHECK: } end do ! CHECK: fir.store %[[J_RES]]#1 to %[[J_REF]] : !fir.ref - ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_I]] overflow : index + ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_I]] : index + ! NSW: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_I:.*]] overflow : index ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_I]] : (index) -> i32 ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref - ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow : i32 + ! NSW: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32 + ! NSW: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow : i32 ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32 ! CHECK: } end do @@ -104,9 +119,11 @@ subroutine nested_loop ! Test a downcounting loop ! CHECK-LABEL: down_counting_loop +! NSW-LABEL: down_counting_loop subroutine down_counting_loop() integer :: i ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdown_counting_loopEi"} + ! NSW: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdown_counting_loopEi"} ! CHECK: %[[C5:.*]] = arith.constant 5 : i32 ! CHECK: %[[C5_CVT:.*]] = fir.convert %[[C5]] : (i32) -> index @@ -116,14 +133,18 @@ subroutine down_counting_loop() ! CHECK: %[[CMINUS1_STEP_CVT:.*]] = fir.convert %[[CMINUS1]] : (i32) -> index ! CHECK: %[[I_LB:.*]] = fir.convert %[[C5_CVT]] : (index) -> i32 ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = + ! NSW: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = ! CHECK-SAME: %[[C5_CVT]] to %[[C1_CVT]] step %[[CMINUS1_STEP_CVT]] ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) { do i=5,1,-1 ! CHECK: fir.store %[[I_IV]] to %[[I_REF]] : !fir.ref - ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[CMINUS1_STEP_CVT]] overflow : index + ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[CMINUS1_STEP_CVT]] : index + ! NSW: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[CMINUS1_STEP_CVT:.*]] overflow : index ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[CMINUS1_STEP_CVT]] : (index) -> i32 ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref - ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow : i32 + ! NSW: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32 + ! NSW: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow : i32 ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32 ! CHECK: } end do @@ -132,6 +153,7 @@ subroutine down_counting_loop() ! Test a general loop with a variable step ! CHECK-LABEL: loop_with_variable_step +! NSW-LABEL: loop_with_variable_step ! CHECK-SAME: (%[[S_REF:.*]]: !fir.ref {fir.bindc_name = "s"}, %[[E_REF:.*]]: !fir.ref {fir.bindc_name = "e"}, %[[ST_REF:.*]]: !fir.ref {fir.bindc_name = "st"}) { subroutine loop_with_variable_step(s,e,st) integer :: s, e, st @@ -144,14 +166,18 @@ subroutine loop_with_variable_step(s,e,st) ! CHECK: %[[ST_CVT:.*]] = fir.convert %[[ST]] : (i32) -> index ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_CVT]] : (index) -> i32 ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = + ! NSW: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = ! CHECK-SAME: %[[S_CVT]] to %[[E_CVT]] step %[[ST_CVT]] ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) { do i=s,e,st ! CHECK: fir.store %[[I_IV]] to %[[I_REF]] : !fir.ref - ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] overflow : index + ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] : index + ! NSW: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT:.*]] overflow : index ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_CVT]] : (index) -> i32 ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref - ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow : i32 + ! NSW: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32 + ! NSW: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow : i32 ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32 ! CHECK: } end do @@ -160,11 +186,13 @@ subroutine loop_with_variable_step(s,e,st) ! Test usage of pointer variables as index, start, end and step variables ! CHECK-LABEL: loop_with_pointer_variables +! NSW-LABEL: loop_with_pointer_variables ! CHECK-SAME: (%[[S_REF:.*]]: !fir.ref {fir.bindc_name = "s", fir.target}, %[[E_REF:.*]]: !fir.ref {fir.bindc_name = "e", fir.target}, %[[ST_REF:.*]]: !fir.ref {fir.bindc_name = "st", fir.target}) { subroutine loop_with_pointer_variables(s,e,st) ! CHECK: %[[E_PTR_REF:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFloop_with_pointer_variablesEeptr.addr"} ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", fir.target, uniq_name = "_QFloop_with_pointer_variablesEi"} ! CHECK: %[[I_PTR_REF:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFloop_with_pointer_variablesEiptr.addr"} +! NSW: %[[I_PTR_REF:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFloop_with_pointer_variablesEiptr.addr"} ! CHECK: %[[S_PTR_REF:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFloop_with_pointer_variablesEsptr.addr"} ! CHECK: %[[ST_PTR_REF:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFloop_with_pointer_variablesEstptr.addr"} integer, target :: i @@ -185,6 +213,7 @@ subroutine loop_with_pointer_variables(s,e,st) stptr => st ! CHECK: %[[I_PTR:.*]] = fir.load %[[I_PTR_REF]] : !fir.ref> +! NSW: %[[I_PTR:.*]] = fir.load %[[I_PTR_REF]] : !fir.ref> ! CHECK: %[[S_PTR:.*]] = fir.load %[[S_PTR_REF]] : !fir.ref> ! CHECK: %[[S:.*]] = fir.load %[[S_PTR]] : !fir.ptr ! CHECK: %[[S_CVT:.*]] = fir.convert %[[S]] : (i32) -> index @@ -196,14 +225,18 @@ subroutine loop_with_pointer_variables(s,e,st) ! CHECK: %[[ST_CVT:.*]] = fir.convert %[[ST]] : (i32) -> index ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_CVT]] : (index) -> i32 ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = +! NSW: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = ! CHECK-SAME: %[[S_CVT]] to %[[E_CVT]] step %[[ST_CVT]] ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) { do iptr=sptr,eptr,stptr ! CHECK: fir.store %[[I_IV]] to %[[I_PTR]] : !fir.ptr -! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] overflow : index +! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] : index +! NSW: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT:.*]] overflow : index ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_CVT]] : (index) -> i32 ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_PTR]] : !fir.ptr -! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow : i32 +! NSW: %[[I_IVLOAD:.*]] = fir.load %[[I_PTR]] : !fir.ptr +! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32 +! NSW: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow : i32 ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32 end do ! CHECK: } @@ -212,9 +245,11 @@ subroutine loop_with_pointer_variables(s,e,st) ! Test usage of non-default integer kind for loop control and loop index variable ! CHECK-LABEL: loop_with_non_default_integer +! NSW-LABEL: loop_with_non_default_integer ! CHECK-SAME: (%[[S_REF:.*]]: !fir.ref {fir.bindc_name = "s"}, %[[E_REF:.*]]: !fir.ref {fir.bindc_name = "e"}, %[[ST_REF:.*]]: !fir.ref {fir.bindc_name = "st"}) { subroutine loop_with_non_default_integer(s,e,st) ! CHECK: %[[I_REF:.*]] = fir.alloca i64 {bindc_name = "i", uniq_name = "_QFloop_with_non_default_integerEi"} + ! NSW: %[[I_REF:.*]] = fir.alloca i64 {bindc_name = "i", uniq_name = "_QFloop_with_non_default_integerEi"} integer(kind=8):: i ! CHECK: %[[S:.*]] = fir.load %[[S_REF]] : !fir.ref ! CHECK: %[[S_CVT:.*]] = fir.convert %[[S]] : (i64) -> index @@ -226,14 +261,18 @@ subroutine loop_with_non_default_integer(s,e,st) ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_CVT]] : (index) -> i64 ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = + ! NSW: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] = ! CHECK-SAME: %[[S_CVT]] to %[[E_CVT]] step %[[ST_CVT]] ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i64) { do i=s,e,st ! CHECK: fir.store %[[I_IV]] to %[[I_REF]] : !fir.ref - ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] overflow : index + ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] : index + ! NSW: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT:.*]] overflow : index ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_CVT]] : (index) -> i64 ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref - ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow : i64 + ! NSW: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref + ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i64 + ! NSW: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow : i64 ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i64 end do ! CHECK: } diff --git a/flang/test/Lower/do_loop_unstructured.f90 b/flang/test/Lower/do_loop_unstructured.f90 index d8890b2d0926ead..e1a669e09c9a895 100644 --- a/flang/test/Lower/do_loop_unstructured.f90 +++ b/flang/test/Lower/do_loop_unstructured.f90 @@ -1,11 +1,9 @@ ! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -o - %s | FileCheck %s -! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fwrapv -o - %s | FileCheck %s --check-prefix=NO-NSW +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -flang-experimental-integer-overflow -o - %s | FileCheck %s --check-prefix=NSW ! Tests for unstructured loops. -! NO-NSW-NOT: overflow - ! Test a simple unstructured loop. Test for the existence of, ! -> The initialization of the trip-count and loop-variable ! -> The branch to the body or the exit inside the header @@ -41,12 +39,42 @@ subroutine simple_unstructured() ! CHECK: fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref ! CHECK: %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref ! CHECK: %[[STEP_ONE_2:.*]] = arith.constant 1 : i32 -! CHECK: %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_ONE_2]] overflow : i32 +! CHECK: %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_ONE_2]] : i32 ! CHECK: fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref ! CHECK: cf.br ^[[HEADER]] ! CHECK: ^[[EXIT]]: ! CHECK: return +! NSW-LABEL: simple_unstructured +! NSW: %[[TRIP_VAR_REF:.*]] = fir.alloca i32 +! NSW: %[[LOOP_VAR_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_unstructuredEi"} +! NSW: %[[ONE:.*]] = arith.constant 1 : i32 +! NSW: %[[HUNDRED:.*]] = arith.constant 100 : i32 +! NSW: %[[STEP_ONE:.*]] = arith.constant 1 : i32 +! NSW: %[[TMP1:.*]] = arith.subi %[[HUNDRED]], %[[ONE]] : i32 +! NSW: %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[STEP_ONE]] : i32 +! NSW: %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[STEP_ONE]] : i32 +! NSW: fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_REF]] : !fir.ref +! NSW: fir.store %[[ONE]] to %[[LOOP_VAR_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER:.*]] +! NSW: ^[[HEADER]]: +! NSW: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref +! NSW: %[[ZERO:.*]] = arith.constant 0 : i32 +! NSW: %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32 +! NSW: cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]] +! NSW: ^[[BODY]]: +! NSW: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref +! NSW: %[[ONE_1:.*]] = arith.constant 1 : i32 +! NSW: %[[TRIP_VAR_NEXT:.*]] = arith.subi %[[TRIP_VAR]], %[[ONE_1]] : i32 +! NSW: fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref +! NSW: %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref +! NSW: %[[STEP_ONE_2:.*]] = arith.constant 1 : i32 +! NSW: %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_ONE_2]] overflow : i32 +! NSW: fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER]] +! NSW: ^[[EXIT]]: +! NSW: return + ! Test an unstructured loop with a step. Mostly similar to the previous one. ! Only difference is a non-unit step. subroutine simple_unstructured_with_step() @@ -80,12 +108,42 @@ subroutine simple_unstructured_with_step() ! CHECK: fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref ! CHECK: %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref ! CHECK: %[[STEP_2:.*]] = arith.constant 2 : i32 -! CHECK: %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_2]] overflow : i32 +! CHECK: %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_2]] : i32 ! CHECK: fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref ! CHECK: cf.br ^[[HEADER]] ! CHECK: ^[[EXIT]]: ! CHECK: return +! NSW-LABEL: simple_unstructured_with_step +! NSW: %[[TRIP_VAR_REF:.*]] = fir.alloca i32 +! NSW: %[[LOOP_VAR_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_unstructured_with_stepEi"} +! NSW: %[[ONE:.*]] = arith.constant 1 : i32 +! NSW: %[[HUNDRED:.*]] = arith.constant 100 : i32 +! NSW: %[[STEP:.*]] = arith.constant 2 : i32 +! NSW: %[[TMP1:.*]] = arith.subi %[[HUNDRED]], %[[ONE]] : i32 +! NSW: %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[STEP]] : i32 +! NSW: %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[STEP]] : i32 +! NSW: fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_REF]] : !fir.ref +! NSW: fir.store %[[ONE]] to %[[LOOP_VAR_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER:.*]] +! NSW: ^[[HEADER]]: +! NSW: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref +! NSW: %[[ZERO:.*]] = arith.constant 0 : i32 +! NSW: %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32 +! NSW: cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]] +! NSW: ^[[BODY]]: +! NSW: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref +! NSW: %[[ONE_1:.*]] = arith.constant 1 : i32 +! NSW: %[[TRIP_VAR_NEXT:.*]] = arith.subi %[[TRIP_VAR]], %[[ONE_1]] : i32 +! NSW: fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref +! NSW: %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref +! NSW: %[[STEP_2:.*]] = arith.constant 2 : i32 +! NSW: %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_2]] overflow : i32 +! NSW: fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER]] +! NSW: ^[[EXIT]]: +! NSW: return + ! Test a three nested unstructured loop. Three nesting is the basic case where ! we have loops that are neither innermost or outermost. subroutine nested_unstructured() @@ -157,7 +215,7 @@ subroutine nested_unstructured() ! CHECK: fir.store %[[TRIP_VAR_K_NEXT]] to %[[TRIP_VAR_K_REF]] : !fir.ref ! CHECK: %[[LOOP_VAR_K:.*]] = fir.load %[[LOOP_VAR_K_REF]] : !fir.ref ! CHECK: %[[K_STEP_2:.*]] = arith.constant 1 : i32 -! CHECK: %[[LOOP_VAR_K_NEXT:.*]] = arith.addi %[[LOOP_VAR_K]], %[[K_STEP_2]] overflow : i32 +! CHECK: %[[LOOP_VAR_K_NEXT:.*]] = arith.addi %[[LOOP_VAR_K]], %[[K_STEP_2]] : i32 ! CHECK: fir.store %[[LOOP_VAR_K_NEXT]] to %[[LOOP_VAR_K_REF]] : !fir.ref ! CHECK: cf.br ^[[HEADER_K]] ! CHECK: ^[[EXIT_K]]: @@ -167,7 +225,7 @@ subroutine nested_unstructured() ! CHECK: fir.store %[[TRIP_VAR_J_NEXT]] to %[[TRIP_VAR_J_REF]] : !fir.ref ! CHECK: %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref ! CHECK: %[[J_STEP_2:.*]] = arith.constant 1 : i32 -! CHECK: %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %[[J_STEP_2]] overflow : i32 +! CHECK: %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %[[J_STEP_2]] : i32 ! CHECK: fir.store %[[LOOP_VAR_J_NEXT]] to %[[LOOP_VAR_J_REF]] : !fir.ref ! CHECK: cf.br ^[[HEADER_J]] ! CHECK: ^[[EXIT_J]]: @@ -177,12 +235,96 @@ subroutine nested_unstructured() ! CHECK: fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref ! CHECK: %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref ! CHECK: %[[I_STEP_2:.*]] = arith.constant 1 : i32 -! CHECK: %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow : i32 +! CHECK: %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] : i32 ! CHECK: fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref ! CHECK: cf.br ^[[HEADER_I]] ! CHECK: ^[[EXIT_I]]: ! CHECK: return +! NSW-LABEL: nested_unstructured +! NSW: %[[TRIP_VAR_K_REF:.*]] = fir.alloca i32 +! NSW: %[[TRIP_VAR_J_REF:.*]] = fir.alloca i32 +! NSW: %[[TRIP_VAR_I_REF:.*]] = fir.alloca i32 +! NSW: %[[LOOP_VAR_I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_unstructuredEi"} +! NSW: %[[LOOP_VAR_J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_unstructuredEj"} +! NSW: %[[LOOP_VAR_K_REF:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFnested_unstructuredEk"} +! NSW: %[[I_START:.*]] = arith.constant 1 : i32 +! NSW: %[[I_END:.*]] = arith.constant 100 : i32 +! NSW: %[[I_STEP:.*]] = arith.constant 1 : i32 +! NSW: %[[TMP1:.*]] = arith.subi %[[I_END]], %[[I_START]] : i32 +! NSW: %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[I_STEP]] : i32 +! NSW: %[[TRIP_COUNT_I:.*]] = arith.divsi %[[TMP2]], %[[I_STEP]] : i32 +! NSW: fir.store %[[TRIP_COUNT_I]] to %[[TRIP_VAR_I_REF]] : !fir.ref +! NSW: fir.store %[[I_START]] to %[[LOOP_VAR_I_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER_I:.*]] +! NSW: ^[[HEADER_I]]: +! NSW: %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref +! NSW: %[[ZERO_1:.*]] = arith.constant 0 : i32 +! NSW: %[[COND_I:.*]] = arith.cmpi sgt, %[[TRIP_VAR_I]], %[[ZERO_1]] : i32 +! NSW: cf.cond_br %[[COND_I]], ^[[BODY_I:.*]], ^[[EXIT_I:.*]] +! NSW: ^[[BODY_I]]: +! NSW: %[[J_START:.*]] = arith.constant 1 : i32 +! NSW: %[[J_END:.*]] = arith.constant 200 : i32 +! NSW: %[[J_STEP:.*]] = arith.constant 1 : i32 +! NSW: %[[TMP3:.*]] = arith.subi %[[J_END]], %[[J_START]] : i32 +! NSW: %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[J_STEP]] : i32 +! NSW: %[[TRIP_COUNT_J:.*]] = arith.divsi %[[TMP4]], %[[J_STEP]] : i32 +! NSW: fir.store %[[TRIP_COUNT_J]] to %[[TRIP_VAR_J_REF]] : !fir.ref +! NSW: fir.store %[[J_START]] to %[[LOOP_VAR_J_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER_J:.*]] +! NSW: ^[[HEADER_J]]: +! NSW: %[[TRIP_VAR_J:.*]] = fir.load %[[TRIP_VAR_J_REF]] : !fir.ref +! NSW: %[[ZERO_2:.*]] = arith.constant 0 : i32 +! NSW: %[[COND_J:.*]] = arith.cmpi sgt, %[[TRIP_VAR_J]], %[[ZERO_2]] : i32 +! NSW: cf.cond_br %[[COND_J]], ^[[BODY_J:.*]], ^[[EXIT_J:.*]] +! NSW: ^[[BODY_J]]: +! NSW: %[[K_START:.*]] = arith.constant 1 : i32 +! NSW: %[[K_END:.*]] = arith.constant 300 : i32 +! NSW: %[[K_STEP:.*]] = arith.constant 1 : i32 +! NSW: %[[TMP3:.*]] = arith.subi %[[K_END]], %[[K_START]] : i32 +! NSW: %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[K_STEP]] : i32 +! NSW: %[[TRIP_COUNT_K:.*]] = arith.divsi %[[TMP4]], %[[K_STEP]] : i32 +! NSW: fir.store %[[TRIP_COUNT_K]] to %[[TRIP_VAR_K_REF]] : !fir.ref +! NSW: fir.store %[[K_START]] to %[[LOOP_VAR_K_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER_K:.*]] +! NSW: ^[[HEADER_K]]: +! NSW: %[[TRIP_VAR_K:.*]] = fir.load %[[TRIP_VAR_K_REF]] : !fir.ref +! NSW: %[[ZERO_2:.*]] = arith.constant 0 : i32 +! NSW: %[[COND_K:.*]] = arith.cmpi sgt, %[[TRIP_VAR_K]], %[[ZERO_2]] : i32 +! NSW: cf.cond_br %[[COND_K]], ^[[BODY_K:.*]], ^[[EXIT_K:.*]] +! NSW: ^[[BODY_K]]: +! NSW: %[[TRIP_VAR_K:.*]] = fir.load %[[TRIP_VAR_K_REF]] : !fir.ref +! NSW: %[[ONE_1:.*]] = arith.constant 1 : i32 +! NSW: %[[TRIP_VAR_K_NEXT:.*]] = arith.subi %[[TRIP_VAR_K]], %[[ONE_1]] : i32 +! NSW: fir.store %[[TRIP_VAR_K_NEXT]] to %[[TRIP_VAR_K_REF]] : !fir.ref +! NSW: %[[LOOP_VAR_K:.*]] = fir.load %[[LOOP_VAR_K_REF]] : !fir.ref +! NSW: %[[K_STEP_2:.*]] = arith.constant 1 : i32 +! NSW: %[[LOOP_VAR_K_NEXT:.*]] = arith.addi %[[LOOP_VAR_K]], %[[K_STEP_2]] overflow : i32 +! NSW: fir.store %[[LOOP_VAR_K_NEXT]] to %[[LOOP_VAR_K_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER_K]] +! NSW: ^[[EXIT_K]]: +! NSW: %[[TRIP_VAR_J:.*]] = fir.load %[[TRIP_VAR_J_REF]] : !fir.ref +! NSW: %[[ONE_1:.*]] = arith.constant 1 : i32 +! NSW: %[[TRIP_VAR_J_NEXT:.*]] = arith.subi %[[TRIP_VAR_J]], %[[ONE_1]] : i32 +! NSW: fir.store %[[TRIP_VAR_J_NEXT]] to %[[TRIP_VAR_J_REF]] : !fir.ref +! NSW: %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref +! NSW: %[[J_STEP_2:.*]] = arith.constant 1 : i32 +! NSW: %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %[[J_STEP_2]] overflow : i32 +! NSW: fir.store %[[LOOP_VAR_J_NEXT]] to %[[LOOP_VAR_J_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER_J]] +! NSW: ^[[EXIT_J]]: +! NSW: %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref +! NSW: %[[ONE_1:.*]] = arith.constant 1 : i32 +! NSW: %[[TRIP_VAR_I_NEXT:.*]] = arith.subi %[[TRIP_VAR_I]], %[[ONE_1]] : i32 +! NSW: fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref +! NSW: %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref +! NSW: %[[I_STEP_2:.*]] = arith.constant 1 : i32 +! NSW: %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow : i32 +! NSW: fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER_I]] +! NSW: ^[[EXIT_I]]: +! NSW: return + ! Test the existence of a structured loop inside an unstructured loop. ! Only minimal checks are inserted for the structured loop. subroutine nested_structured_in_unstructured() @@ -217,9 +359,9 @@ subroutine nested_structured_in_unstructured() ! CHECK-SAME: %{{.*}} to %{{.*}} step %[[ST:[^ ]*]] ! CHECK-SAME: iter_args(%[[J_IV:.*]] = %{{.*}}) -> (index, i32) { ! CHECK: fir.store %[[J_IV]] to %[[LOOP_VAR_J_REF]] : !fir.ref -! CHECK: %[[J_INDEX_NEXT:.*]] = arith.addi %[[J_INDEX]], %[[ST]] overflow : index +! CHECK: %[[J_INDEX_NEXT:.*]] = arith.addi %[[J_INDEX]], %[[ST]] : index ! CHECK: %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref -! CHECK: %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %{{[^ ]*}} overflow : i32 +! CHECK: %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %{{[^ ]*}} : i32 ! CHECK: } ! CHECK: %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref ! CHECK: %[[C1_3:.*]] = arith.constant 1 : i32 @@ -227,8 +369,47 @@ subroutine nested_structured_in_unstructured() ! CHECK: fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref ! CHECK: %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref ! CHECK: %[[I_STEP_2:.*]] = arith.constant 1 : i32 -! CHECK: %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow : i32 +! CHECK: %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] : i32 ! CHECK: fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref ! CHECK: cf.br ^[[HEADER]] ! CHECK: ^[[EXIT]]: ! CHECK: return + +! NSW-LABEL: nested_structured_in_unstructured +! NSW: %[[TRIP_VAR_I_REF:.*]] = fir.alloca i32 +! NSW: %[[LOOP_VAR_I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_structured_in_unstructuredEi"} +! NSW: %[[LOOP_VAR_J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_structured_in_unstructuredEj"} +! NSW: %[[I_START:.*]] = arith.constant 1 : i32 +! NSW: %[[I_END:.*]] = arith.constant 100 : i32 +! NSW: %[[I_STEP:.*]] = arith.constant 1 : i32 +! NSW: %[[TMP1:.*]] = arith.subi %[[I_END]], %[[I_START]] : i32 +! NSW: %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[I_STEP]] : i32 +! NSW: %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[I_STEP]] : i32 +! NSW: fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_I_REF]] : !fir.ref +! NSW: fir.store %[[I_START]] to %[[LOOP_VAR_I_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER:.*]] +! NSW: ^[[HEADER]]: +! NSW: %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref +! NSW: %[[ZERO:.*]] = arith.constant 0 : i32 +! NSW: %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32 +! NSW: cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]] +! NSW: ^[[BODY]]: +! NSW: %{{.*}} = fir.do_loop %[[J_INDEX:[^ ]*]] = +! NSW-SAME: %{{.*}} to %{{.*}} step %[[ST:[^ ]*]] +! NSW-SAME: iter_args(%[[J_IV:.*]] = %{{.*}}) -> (index, i32) { +! NSW: fir.store %[[J_IV]] to %[[LOOP_VAR_J_REF]] : !fir.ref +! NSW: %[[J_INDEX_NEXT:.*]] = arith.addi %[[J_INDEX]], %[[ST]] overflow : index +! NSW: %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref +! NSW: %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %{{[^ ]*}} overflow : i32 +! NSW: } +! NSW: %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref +! NSW: %[[C1_3:.*]] = arith.constant 1 : i32 +! NSW: %[[TRIP_VAR_I_NEXT:.*]] = arith.subi %[[TRIP_VAR_I]], %[[C1_3]] : i32 +! NSW: fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref +! NSW: %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref +! NSW: %[[I_STEP_2:.*]] = arith.constant 1 : i32 +! NSW: %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow : i32 +! NSW: fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref +! NSW: cf.br ^[[HEADER]] +! NSW: ^[[EXIT]]: +! NSW: return diff --git a/flang/test/Lower/goto-do-body.f90 b/flang/test/Lower/goto-do-body.f90 index 89e4a7a64a87bac..910e55f1839fd2a 100644 --- a/flang/test/Lower/goto-do-body.f90 +++ b/flang/test/Lower/goto-do-body.f90 @@ -48,7 +48,7 @@ subroutine sub1() ! CHECK: fir.store %[[TMP8]] to %[[TRIP]] : !fir.ref ! CHECK: %[[TMP9:.*]] = fir.load %[[I]] : !fir.ref ! CHECK: %[[C1_4:.*]] = arith.constant 1 : i32 -! CHECK: %[[TMP10:.*]] = arith.addi %[[TMP9]], %[[C1_4]] overflow : i32 +! CHECK: %[[TMP10:.*]] = arith.addi %[[TMP9]], %[[C1_4]] : i32 ! CHECK: fir.store %[[TMP10]] to %[[I]] : !fir.ref ! CHECK: cf.br ^[[HEADER]] end do @@ -115,7 +115,7 @@ subroutine sub2() ! CHECK: fir.store %[[TMP10]] to %[[TRIP]] : !fir.ref ! CHECK: %[[TMP11:.*]] = fir.load %[[I]] : !fir.ref ! CHECK: %[[STEP_VAL:.*]] = fir.load %[[STEP_VAR]] : !fir.ref -! CHECK: %[[TMP12:.*]] = arith.addi %[[TMP11]], %[[STEP_VAL]] overflow : i32 +! CHECK: %[[TMP12:.*]] = arith.addi %[[TMP11]], %[[STEP_VAL]] : i32 ! CHECK: fir.store %[[TMP12]] to %[[I]] : !fir.ref ! CHECK: cf.br ^[[HEADER]] end do diff --git a/flang/test/Lower/host-associated.f90 b/flang/test/Lower/host-associated.f90 index 33acdff1bb74cf6..9b4269df7bfcb67 100644 --- a/flang/test/Lower/host-associated.f90 +++ b/flang/test/Lower/host-associated.f90 @@ -1,5 +1,5 @@ ! Test internal procedure host association lowering. -! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s +! RUN: bbc -hlfir=false %s -o - | FileCheck %s ! ----------------------------------------------------------------------------- ! Test non character intrinsic scalars diff --git a/flang/test/Lower/infinite_loop.f90 b/flang/test/Lower/infinite_loop.f90 index de0bee779c5b623..6942dda8d7a23a1 100644 --- a/flang/test/Lower/infinite_loop.f90 +++ b/flang/test/Lower/infinite_loop.f90 @@ -1,11 +1,9 @@ ! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -o - %s | FileCheck %s -! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fwrapv -o - %s | FileCheck %s --check-prefix=NO-NSW +! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -flang-experimental-integer-overflow -o - %s | FileCheck %s --check-prefix=NSW ! Tests for infinite loop. -! NO-NSW-NOT: overflow - subroutine empty_infinite() do end do @@ -98,10 +96,10 @@ subroutine structured_loop_in_infinite(i) ! CHECK-SAME: %[[C1_INDEX]] to %[[C10_INDEX]] step %[[C1_1]] ! CHECK-SAME: iter_args(%[[J_IV:.*]] = %[[J_LB]]) -> (index, i32) { ! CHECK: fir.store %[[J_IV]] to %[[J_REF]] : !fir.ref -! CHECK: %[[J_NEXT:.*]] = arith.addi %[[J]], %[[C1_1]] overflow : index +! CHECK: %[[J_NEXT:.*]] = arith.addi %[[J]], %[[C1_1]] : index ! CHECK: %[[J_STEPCAST:.*]] = fir.convert %[[C1_1]] : (index) -> i32 ! CHECK: %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref -! CHECK: %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] overflow : i32 +! CHECK: %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] : i32 ! CHECK: fir.result %[[J_NEXT]], %[[J_IVINC]] : index, i32 ! CHECK: } ! CHECK: fir.store %[[J_FINAL]]#1 to %[[J_REF]] : !fir.ref @@ -109,6 +107,39 @@ subroutine structured_loop_in_infinite(i) ! CHECK: ^[[RETURN]]: ! CHECK: return +! NSW-LABEL: structured_loop_in_infinite +! NSW-SAME: %[[I_REF:.*]]: !fir.ref +! NSW: %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFstructured_loop_in_infiniteEj"} +! NSW: cf.br ^[[BODY1:.*]] +! NSW: ^[[BODY1]]: +! NSW: %[[I:.*]] = fir.load %[[I_REF]] : !fir.ref +! NSW: %[[C100:.*]] = arith.constant 100 : i32 +! NSW: %[[COND:.*]] = arith.cmpi sgt, %[[I]], %[[C100]] : i32 +! NSW: cf.cond_br %[[COND]], ^[[EXIT:.*]], ^[[BODY2:.*]] +! NSW: ^[[EXIT]]: +! NSW: cf.br ^[[RETURN:.*]] +! NSW: ^[[BODY2:.*]]: +! NSW: %[[C1:.*]] = arith.constant 1 : i32 +! NSW: %[[C1_INDEX:.*]] = fir.convert %[[C1]] : (i32) -> index +! NSW: %[[C10:.*]] = arith.constant 10 : i32 +! NSW: %[[C10_INDEX:.*]] = fir.convert %[[C10]] : (i32) -> index +! NSW: %[[C1_1:.*]] = arith.constant 1 : index +! NSW: %[[J_LB:.*]] = fir.convert %[[C1_INDEX]] : (index) -> i32 +! NSW: %[[J_FINAL:.*]]:2 = fir.do_loop %[[J:[^ ]*]] = +! NSW-SAME: %[[C1_INDEX]] to %[[C10_INDEX]] step %[[C1_1]] +! NSW-SAME: iter_args(%[[J_IV:.*]] = %[[J_LB]]) -> (index, i32) { +! NSW: fir.store %[[J_IV]] to %[[J_REF]] : !fir.ref +! NSW: %[[J_NEXT:.*]] = arith.addi %[[J]], %[[C1_1]] overflow : index +! NSW: %[[J_STEPCAST:.*]] = fir.convert %[[C1_1]] : (index) -> i32 +! NSW: %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref +! NSW: %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] overflow : i32 +! NSW: fir.result %[[J_NEXT]], %[[J_IVINC]] : index, i32 +! NSW: } +! NSW: fir.store %[[J_FINAL]]#1 to %[[J_REF]] : !fir.ref +! NSW: cf.br ^[[BODY1]] +! NSW: ^[[RETURN]]: +! NSW: return + subroutine empty_infinite_in_while(i) integer :: i do while (i .gt. 50) diff --git a/flang/test/Lower/io-implied-do-fixes.f90 b/flang/test/Lower/io-implied-do-fixes.f90 index cd4fd43e051946e..a6c115fa80ded17 100644 --- a/flang/test/Lower/io-implied-do-fixes.f90 +++ b/flang/test/Lower/io-implied-do-fixes.f90 @@ -1,20 +1,30 @@ ! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false %s -o - | FileCheck %s -! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false -fwrapv %s -o - | FileCheck %s --check-prefix=NO-NSW +! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false -integer-overflow %s -o - | FileCheck %s --check-prefix=NSW ! UNSUPPORTED: system-windows -! NO-NSW-NOT: overflow - ! CHECK-LABEL: func @_QPido1 ! CHECK: %[[J_REF_ADDR:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFido1Eiptr.addr"} ! CHECK: %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref> ! CHECK: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index { ! CHECK: %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32 ! CHECK: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.ptr -! CHECK: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow : index +! CHECK: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} : index ! CHECK: fir.result %[[J_VAL_NEXT]] : index ! CHECK: } ! CHECK: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32 ! CHECK: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.ptr + +! NSW-LABEL: func @_QPido1 +! NSW: %[[J_REF_ADDR:.*]] = fir.alloca !fir.ptr {uniq_name = "_QFido1Eiptr.addr"} +! NSW: %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref> +! NSW: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index { +! NSW: %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32 +! NSW: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.ptr +! NSW: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow : index +! NSW: fir.result %[[J_VAL_NEXT]] : index +! NSW: } +! NSW: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32 +! NSW: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.ptr subroutine ido1 integer, pointer :: iptr integer, target :: itgt @@ -28,11 +38,23 @@ subroutine ido1 ! CHECK: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index { ! CHECK: %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32 ! CHECK: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap -! CHECK: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow : index +! CHECK: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} : index ! CHECK: fir.result %[[J_VAL_NEXT]] : index ! CHECK: } ! CHECK: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32 ! CHECK: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap + +! NSW-LABEL: func @_QPido2 +! NSW: %[[J_REF_ADDR:.*]] = fir.alloca !fir.heap {uniq_name = "_QFido2Eiptr.addr"} +! NSW: %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref> +! NSW: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index { +! NSW: %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32 +! NSW: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap +! NSW: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow : index +! NSW: fir.result %[[J_VAL_NEXT]] : index +! NSW: } +! NSW: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32 +! NSW: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap subroutine ido2 integer, allocatable :: iptr allocate(iptr) @@ -47,12 +69,27 @@ subroutine ido2 ! CHECK: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap ! CHECK: %[[RES:.*]] = fir.if %[[OK]] -> (i1) { ! CHECK: } -! CHECK: %[[J_VAL_INC:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow : index +! CHECK: %[[J_VAL_INC:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} : index ! CHECK: %[[J_VAL_NEXT:.*]] = arith.select %[[RES]], %[[J_VAL_INC]], %[[J_VAL]] : index ! CHECK: fir.result %[[J_VAL_NEXT]], %[[RES]] : index, i1 ! CHECK: } ! CHECK: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]]#0 : (index) -> i32 ! CHECK: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap {uniq_name = "_QFido3Ej.addr"} +! NSW: %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref> +! NSW: %[[J_VAL_FINAL:.*]]:2 = fir.iterate_while (%[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}}) and (%[[OK:.*]] = {{.*}}) -> (index, i1) { +! NSW: %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32 +! NSW: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap +! NSW: %[[RES:.*]] = fir.if %[[OK]] -> (i1) { +! NSW: } +! NSW: %[[J_VAL_INC:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow : index +! NSW: %[[J_VAL_NEXT:.*]] = arith.select %[[RES]], %[[J_VAL_INC]], %[[J_VAL]] : index +! NSW: fir.result %[[J_VAL_NEXT]], %[[RES]] : index, i1 +! NSW: } +! NSW: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]]#0 : (index) -> i32 +! NSW: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap ! CHECK: %[[VAL_21:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_21]] overflow : i32 +! CHECK: %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_21]] : i32 ! CHECK: fir.store %[[VAL_22]] to %[[VAL_3]] : !fir.ptr ! CHECK: br ^bb1 ! CHECK: ^bb5: diff --git a/flang/test/Lower/mixed_loops.f90 b/flang/test/Lower/mixed_loops.f90 index 991fd7aa82bb955..1aa0225129bed62 100644 --- a/flang/test/Lower/mixed_loops.f90 +++ b/flang/test/Lower/mixed_loops.f90 @@ -53,7 +53,7 @@ subroutine while_inside_do_loop ! CHECK: fir.store %[[TDEC]] to %[[T_REF]] ! CHECK: %[[I3:.*]] = fir.load %[[I_REF]] : !fir.ref ! CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 - ! CHECK: %[[IINC:.*]] = arith.addi %[[I3]], %[[C1_2]] overflow : i32 + ! CHECK: %[[IINC:.*]] = arith.addi %[[I3]], %[[C1_2]] : i32 ! CHECK: fir.store %[[IINC]] to %[[I_REF]] : !fir.ref ! CHECK: br ^[[HDR1]] end do @@ -100,10 +100,10 @@ subroutine do_inside_while_loop ! CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32 ! CHECK: %[[JINC:.*]] = arith.muli %[[C2]], %[[J2]] : i32 ! CHECK: fir.store %[[JINC]] to %[[J_REF]] : !fir.ref - ! CHECK: %[[IINC:.*]] = arith.addi %[[IDX]], %[[C1]] overflow : index + ! CHECK: %[[IINC:.*]] = arith.addi %[[IDX]], %[[C1]] : index ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[C1]] : (index) -> i32 ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref - ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow : i32 + ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32 ! CHECK: fir.result %[[IINC]], %[[I_IVINC]] : index, i32 do i=8,13 j=j*2 diff --git a/flang/test/Lower/vector-subscript-io.f90 b/flang/test/Lower/vector-subscript-io.f90 index 372130fd099074f..129e3ee1206c09e 100644 --- a/flang/test/Lower/vector-subscript-io.f90 +++ b/flang/test/Lower/vector-subscript-io.f90 @@ -30,7 +30,7 @@ subroutine simple(x, y) ! CHECK: %[[VAL_19:.*]] = fir.array_coor %[[VAL_20]](%[[VAL_10]]) {{\[}}%[[VAL_11]]] %[[VAL_18]] : (!fir.ref>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_19]] : (!fir.ref) -> !fir.ref ! CHECK: %[[VAL_22:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_9]], %[[VAL_21]], %[[VAL_3]]) {{.*}}: (!fir.ref, !fir.ref, i32) -> i1 -! CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_12]], %[[VAL_6]] overflow : index +! CHECK: %[[VAL_23:.*]] = arith.addi %[[VAL_12]], %[[VAL_6]] : index ! CHECK: %[[VAL_24:.*]] = arith.subi %[[VAL_13]], %[[VAL_6]] : index ! CHECK: cf.br ^bb1(%[[VAL_23]], %[[VAL_24]] : index, index) ! CHECK: ^bb3: @@ -79,7 +79,7 @@ integer function get_substcript() ! CHECK: %[[VAL_49:.*]] = fir.convert %[[VAL_48]] : (i32) -> index ! CHECK: %[[VAL_50:.*]] = fir.array_coor %[[VAL_51]] {{\[}}%[[VAL_42]]] %[[VAL_46]], %[[VAL_49]] : (!fir.box>, !fir.slice<2>, index, index) -> !fir.ref ! CHECK: %[[VAL_52:.*]] = fir.call @_FortranAioInputReal32(%[[VAL_34]], %[[VAL_50]]) {{.*}}: (!fir.ref, !fir.ref) -> i1 -! CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_43]], %[[VAL_30]] overflow : index +! CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_43]], %[[VAL_30]] : index ! CHECK: %[[VAL_54:.*]] = arith.subi %[[VAL_44]], %[[VAL_30]] : index ! CHECK: cf.br ^bb1(%[[VAL_53]], %[[VAL_54]] : index, index) ! CHECK: ^bb3: @@ -122,7 +122,7 @@ subroutine with_assumed_shapes(x, y) ! CHECK: %[[VAL_77:.*]] = fir.array_coor %[[VAL_78]] {{\[}}%[[VAL_70]]] %[[VAL_76]] : (!fir.box>, !fir.slice<1>, index) -> !fir.ref ! CHECK: %[[VAL_79:.*]] = fir.convert %[[VAL_77]] : (!fir.ref) -> !fir.ref ! CHECK: %[[VAL_80:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_67]], %[[VAL_79]], %[[VAL_62]]) {{.*}}: (!fir.ref, !fir.ref, i32) -> i1 -! CHECK: %[[VAL_81:.*]] = arith.addi %[[VAL_71]], %[[VAL_64]] overflow : index +! CHECK: %[[VAL_81:.*]] = arith.addi %[[VAL_71]], %[[VAL_64]] : index ! CHECK: %[[VAL_82:.*]] = arith.subi %[[VAL_72]], %[[VAL_64]] : index ! CHECK: cf.br ^bb1(%[[VAL_81]], %[[VAL_82]] : index, index) ! CHECK: ^bb3: @@ -162,7 +162,7 @@ subroutine lower_bounds(x, y) ! CHECK: %[[VAL_107:.*]] = fir.array_coor %[[VAL_108]](%[[VAL_97]]) {{\[}}%[[VAL_99]]] %[[VAL_91]], %[[VAL_106]] : (!fir.ref>, !fir.shapeshift<2>, !fir.slice<2>, index, index) -> !fir.ref ! CHECK: %[[VAL_109:.*]] = fir.convert %[[VAL_107]] : (!fir.ref) -> !fir.ref ! CHECK: %[[VAL_110:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_96]], %[[VAL_109]], %[[VAL_90]]) {{.*}}: (!fir.ref, !fir.ref, i32) -> i1 -! CHECK: %[[VAL_111:.*]] = arith.addi %[[VAL_100]], %[[VAL_93]] overflow : index +! CHECK: %[[VAL_111:.*]] = arith.addi %[[VAL_100]], %[[VAL_93]] : index ! CHECK: %[[VAL_112:.*]] = arith.subi %[[VAL_101]], %[[VAL_93]] : index ! CHECK: cf.br ^bb1(%[[VAL_111]], %[[VAL_112]] : index, index) ! CHECK: ^bb3: @@ -202,11 +202,11 @@ subroutine two_vectors(x, y1, y2) ! CHECK: %[[VAL_138:.*]] = fir.convert %[[VAL_137]] : (i32) -> index ! CHECK: %[[VAL_139:.*]] = fir.array_coor %[[VAL_140]](%[[VAL_123]]) {{\[}}%[[VAL_124]]] %[[VAL_134]], %[[VAL_138]] : (!fir.ref>, !fir.shape<2>, !fir.slice<2>, index, index) -> !fir.ref ! CHECK: %[[VAL_141:.*]] = fir.call @_FortranAioInputReal32(%[[VAL_122]], %[[VAL_139]]) {{.*}}: (!fir.ref, !fir.ref) -> i1 -! CHECK: %[[VAL_142:.*]] = arith.addi %[[VAL_128]], %[[VAL_119]] overflow : index +! CHECK: %[[VAL_142:.*]] = arith.addi %[[VAL_128]], %[[VAL_119]] : index ! CHECK: %[[VAL_143:.*]] = arith.subi %[[VAL_129]], %[[VAL_119]] : index ! CHECK: cf.br ^bb2(%[[VAL_142]], %[[VAL_143]] : index, index) ! CHECK: ^bb4: -! CHECK: %[[VAL_144:.*]] = arith.addi %[[VAL_125]], %[[VAL_119]] overflow : index +! CHECK: %[[VAL_144:.*]] = arith.addi %[[VAL_125]], %[[VAL_119]] : index ! CHECK: %[[VAL_145:.*]] = arith.subi %[[VAL_126]], %[[VAL_119]] : index ! CHECK: cf.br ^bb1(%[[VAL_144]], %[[VAL_145]] : index, index) ! CHECK: ^bb5: @@ -245,11 +245,11 @@ subroutine triplets_and_vector(x, y) ! CHECK: %[[VAL_169:.*]] = fir.array_coor %[[VAL_170]](%[[VAL_157]]) {{\[}}%[[VAL_158]]] %[[VAL_162]], %[[VAL_168]] : (!fir.ref>>, !fir.shape<2>, !fir.slice<2>, index, index) -> !fir.ref> ! CHECK: %[[VAL_171:.*]] = fir.convert %[[VAL_169]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_172:.*]] = fir.call @_FortranAioInputComplex32(%[[VAL_156]], %[[VAL_171]]) {{.*}}: (!fir.ref, !fir.ref) -> i1 -! CHECK: %[[VAL_173:.*]] = arith.addi %[[VAL_162]], %[[VAL_153]] overflow : index +! CHECK: %[[VAL_173:.*]] = arith.addi %[[VAL_162]], %[[VAL_153]] : index ! CHECK: %[[VAL_174:.*]] = arith.subi %[[VAL_163]], %[[VAL_153]] : index ! CHECK: cf.br ^bb2(%[[VAL_173]], %[[VAL_174]] : index, index) ! CHECK: ^bb4: -! CHECK: %[[VAL_175:.*]] = arith.addi %[[VAL_159]], %[[VAL_153]] overflow : index +! CHECK: %[[VAL_175:.*]] = arith.addi %[[VAL_159]], %[[VAL_153]] : index ! CHECK: %[[VAL_176:.*]] = arith.subi %[[VAL_160]], %[[VAL_153]] : index ! CHECK: cf.br ^bb1(%[[VAL_175]], %[[VAL_176]] : index, index) ! CHECK: ^bb5: @@ -287,7 +287,7 @@ subroutine simple_char(x, y) ! CHECK: %[[VAL_200:.*]] = fir.convert %[[VAL_199]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_201:.*]] = fir.convert %[[VAL_184]]#1 : (index) -> i64 ! CHECK: %[[VAL_202:.*]] = fir.call @_FortranAioInputAscii(%[[VAL_189]], %[[VAL_200]], %[[VAL_201]]) {{.*}}: (!fir.ref, !fir.ref, i64) -> i1 -! CHECK: %[[VAL_203:.*]] = arith.addi %[[VAL_192]], %[[VAL_183]] overflow : index +! CHECK: %[[VAL_203:.*]] = arith.addi %[[VAL_192]], %[[VAL_183]] : index ! CHECK: %[[VAL_204:.*]] = arith.subi %[[VAL_193]], %[[VAL_183]] : index ! CHECK: cf.br ^bb1(%[[VAL_203]], %[[VAL_204]] : index, index) ! CHECK: ^bb3: @@ -333,7 +333,7 @@ subroutine substring(x, y, i, j) ! CHECK: %[[VAL_238:.*]] = fir.convert %[[VAL_233]] : (!fir.ref>) -> !fir.ref ! CHECK: %[[VAL_239:.*]] = fir.convert %[[VAL_237]] : (index) -> i64 ! CHECK: %[[VAL_240:.*]] = fir.call @_FortranAioInputAscii(%[[VAL_213]], %[[VAL_238]], %[[VAL_239]]) {{.*}}: (!fir.ref, !fir.ref, i64) -> i1 -! CHECK: %[[VAL_241:.*]] = arith.addi %[[VAL_221]], %[[VAL_210]] overflow : index +! CHECK: %[[VAL_241:.*]] = arith.addi %[[VAL_221]], %[[VAL_210]] : index ! CHECK: %[[VAL_242:.*]] = arith.subi %[[VAL_222]], %[[VAL_210]] : index ! CHECK: cf.br ^bb1(%[[VAL_241]], %[[VAL_242]] : index, index) ! CHECK: ^bb3: @@ -366,7 +366,7 @@ subroutine complex_part(z, y) ! CHECK: %[[VAL_260:.*]] = fir.convert %[[VAL_259]] : (i32) -> index ! CHECK: %[[VAL_261:.*]] = fir.array_coor %[[VAL_262]] {{\[}}%[[VAL_254]]] %[[VAL_260]] : (!fir.box>>, !fir.slice<1>, index) -> !fir.ref ! CHECK: %[[VAL_263:.*]] = fir.call @_FortranAioInputReal32(%[[VAL_251]], %[[VAL_261]]) {{.*}}: (!fir.ref, !fir.ref) -> i1 -! CHECK: %[[VAL_264:.*]] = arith.addi %[[VAL_255]], %[[VAL_248]] overflow : index +! CHECK: %[[VAL_264:.*]] = arith.addi %[[VAL_255]], %[[VAL_248]] : index ! CHECK: %[[VAL_265:.*]] = arith.subi %[[VAL_256]], %[[VAL_248]] : index ! CHECK: cf.br ^bb1(%[[VAL_264]], %[[VAL_265]] : index, index) ! CHECK: ^bb3: @@ -414,7 +414,7 @@ subroutine simple_derived(x, y) ! CHECK: %[[VAL_288:.*]] = fir.embox %[[VAL_286]] : (!fir.ref}>>) -> !fir.box}>> ! CHECK: %[[VAL_289:.*]] = fir.convert %[[VAL_288]] : (!fir.box}>>) -> !fir.box ! CHECK: %[[VAL_290:.*]] = fir.call @_FortranAioInputDerivedType(%[[VAL_276]], %[[VAL_289]], {{.*}}) {{.*}}: (!fir.ref, !fir.box, !fir.ref) -> i1 -! CHECK: %[[VAL_291:.*]] = arith.addi %[[VAL_279]], %[[VAL_273]] overflow : index +! CHECK: %[[VAL_291:.*]] = arith.addi %[[VAL_279]], %[[VAL_273]] : index ! CHECK: %[[VAL_292:.*]] = arith.subi %[[VAL_280]], %[[VAL_273]] : index ! CHECK: cf.br ^bb1(%[[VAL_291]], %[[VAL_292]] : index, index) ! CHECK: ^bb3: @@ -463,11 +463,11 @@ subroutine with_path(b, i) ! CHECK: %[[VAL_325:.*]] = fir.array_coor %[[VAL_326:.*]](%[[VAL_313]]) {{\[}}%[[VAL_315]]] %[[VAL_301]], %[[VAL_324]], %[[VAL_316]] : (!fir.box}>>}>>>, !fir.shift<3>, !fir.slice<3>, index, index, index) -> !fir.ref ! CHECK: %[[VAL_327:.*]] = fir.convert %[[VAL_325]] : (!fir.ref) -> !fir.ref ! CHECK: %[[VAL_328:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_308]], %[[VAL_327]], %[[VAL_302]]) {{.*}}: (!fir.ref, !fir.ref, i32) -> i1 -! CHECK: %[[VAL_329:.*]] = arith.addi %[[VAL_319]], %[[VAL_305]] overflow : index +! CHECK: %[[VAL_329:.*]] = arith.addi %[[VAL_319]], %[[VAL_305]] : index ! CHECK: %[[VAL_330:.*]] = arith.subi %[[VAL_320]], %[[VAL_305]] : index ! CHECK: cf.br ^bb2(%[[VAL_329]], %[[VAL_330]] : index, index) ! CHECK: ^bb4: -! CHECK: %[[VAL_331:.*]] = arith.addi %[[VAL_316]], %[[VAL_305]] overflow : index +! CHECK: %[[VAL_331:.*]] = arith.addi %[[VAL_316]], %[[VAL_305]] : index ! CHECK: %[[VAL_332:.*]] = arith.subi %[[VAL_317]], %[[VAL_305]] : index ! CHECK: cf.br ^bb1(%[[VAL_331]], %[[VAL_332]] : index, index) ! CHECK: ^bb5: @@ -505,7 +505,7 @@ subroutine simple_iostat(x, y, j, stat) ! CHECK: %[[VAL_355:.*]] = fir.convert %[[VAL_354]] : (i32) -> index ! CHECK: %[[VAL_356:.*]] = fir.array_coor %[[VAL_357]] {{\[}}%[[VAL_347]]] %[[VAL_355]] : (!fir.box>, !fir.slice<1>, index) -> !fir.ref ! CHECK: %[[VAL_358:.*]] = fir.call @_FortranAioInputReal32(%[[VAL_343]], %[[VAL_356]]) {{.*}}: (!fir.ref, !fir.ref) -> i1 -! CHECK: %[[VAL_359:.*]] = arith.addi %[[VAL_349]], %[[VAL_338]] overflow : index +! CHECK: %[[VAL_359:.*]] = arith.addi %[[VAL_349]], %[[VAL_338]] : index ! CHECK: cf.br ^bb1(%[[VAL_359]], %[[VAL_358]] : index, i1) ! CHECK: ^bb3: ! CHECK: cf.cond_br %[[VAL_350]], ^bb4, ^bb5 @@ -568,10 +568,10 @@ subroutine iostat_in_io_loop(k, j, stat) ! CHECK: %[[VAL_399:.*]] = fir.array_coor %[[VAL_400]](%[[VAL_387]]) {{\[}}%[[VAL_389]]] %[[VAL_394]], %[[VAL_398]] : (!fir.ref>, !fir.shape<2>, !fir.slice<2>, index, index) -> !fir.ref ! CHECK: %[[VAL_401:.*]] = fir.convert %[[VAL_399]] : (!fir.ref) -> !fir.ref ! CHECK: %[[VAL_402:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_378]], %[[VAL_401]], %[[VAL_374]]) {{.*}}: (!fir.ref, !fir.ref, i32) -> i1 -! CHECK: %[[VAL_403:.*]] = arith.addi %[[VAL_390]], %[[VAL_371]] overflow : index +! CHECK: %[[VAL_403:.*]] = arith.addi %[[VAL_390]], %[[VAL_371]] : index ! CHECK: cf.br ^bb4(%[[VAL_403]], %[[VAL_402]] : index, i1) ! CHECK: ^bb6(%[[VAL_404:.*]]: i1): -! CHECK: %[[VAL_405:.*]] = arith.addi %[[VAL_380]], %[[VAL_371]] overflow : index +! CHECK: %[[VAL_405:.*]] = arith.addi %[[VAL_380]], %[[VAL_371]] : index ! CHECK: cf.br ^bb1(%[[VAL_405]], %[[VAL_404]] : index, i1) ! CHECK: ^bb7: ! CHECK: %[[VAL_406:.*]] = fir.convert %[[VAL_380]] : (index) -> i32 diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index af38bf272b02b16..fe5e36f704c76cf 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -234,6 +234,12 @@ static llvm::cl::opt integerWrapAround( llvm::cl::desc("Treat signed integer overflow as two's complement"), llvm::cl::init(false)); +// TODO: integrate this option with the above +static llvm::cl::opt + setNSW("integer-overflow", + llvm::cl::desc("add nsw flag to internal operations"), + llvm::cl::init(false)); + #define FLANG_EXCLUDE_CODEGEN #include "flang/Optimizer/Passes/CommandLineOpts.h" #include "flang/Optimizer/Passes/Pipelines.h" @@ -375,6 +381,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR( loweringOptions.setNoPPCNativeVecElemOrder(enableNoPPCNativeVecElemOrder); loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR); loweringOptions.setIntegerWrapAround(integerWrapAround); + loweringOptions.setNSWOnLoopVarInc(setNSW); std::vector envDefaults = {}; Fortran::frontend::TargetOptions targetOpts; Fortran::frontend::CodeGenOptions cgOpts; @@ -460,7 +467,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR( // Add O2 optimizer pass pipeline. MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2); - config.NSWOnLoopVarInc = !integerWrapAround; + config.NSWOnLoopVarInc = setNSW; fir::registerDefaultInlinerPass(config); fir::createDefaultFIROptimizerPassPipeline(pm, config); } From 1f6741c1645954b1f4b2fbca470a20081f5e75af Mon Sep 17 00:00:00 2001 From: Jan Voung Date: Mon, 28 Oct 2024 10:28:02 -0400 Subject: [PATCH 175/425] [clang][dataflow] Don't clear cached field state if field is const (#113698) ... in the unchecked optional access model. --- .../Models/UncheckedOptionalAccessModel.cpp | 8 +++++-- .../UncheckedOptionalAccessModelTest.cpp | 23 ++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index b0bd8274405d02e..31ae2b94f5b6174 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -601,10 +601,14 @@ void handleNonConstMemberCall(const CallExpr *CE, dataflow::RecordStorageLocation *RecordLoc, const MatchFinder::MatchResult &Result, LatticeTransferState &State) { - // When a non-const member function is called, reset some state. if (RecordLoc != nullptr) { + // When a non-const member function is called, clear all (non-const) + // optional fields of the receiver. Const-qualified fields can't be + // changed (at least, not without UB). for (const auto &[Field, FieldLoc] : RecordLoc->children()) { - if (isSupportedOptionalType(Field->getType())) { + QualType FieldType = Field->getType(); + if (!FieldType.isConstQualified() && + isSupportedOptionalType(Field->getType())) { auto *FieldRecordLoc = cast_or_null(FieldLoc); if (FieldRecordLoc) { setHasValue(*FieldRecordLoc, State.Env.makeAtomicBoolValue(), diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp index 22fe347c425593f..5b64eaca0e10d3a 100644 --- a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp @@ -2167,7 +2167,7 @@ TEST_P(UncheckedOptionalAccessTest, OptionalReturnedFromFuntionCall) { )"); } -TEST_P(UncheckedOptionalAccessTest, OptionalFieldModified) { +TEST_P(UncheckedOptionalAccessTest, NonConstMethodMayClearOptionalField) { ExpectDiagnosticsFor( R"( #include "unchecked_optional_access_test.h" @@ -2187,6 +2187,27 @@ TEST_P(UncheckedOptionalAccessTest, OptionalFieldModified) { )"); } +TEST_P(UncheckedOptionalAccessTest, + NonConstMethodMayNotClearConstOptionalField) { + ExpectDiagnosticsFor( + R"( + #include "unchecked_optional_access_test.h" + + struct Foo { + const $ns::$optional opt; + void clear(); + }; + + void target(Foo& foo) { + if (foo.opt) { + foo.opt.value(); + foo.clear(); + foo.opt.value(); + } + } + )"); +} + TEST_P(UncheckedOptionalAccessTest, StdSwap) { ExpectDiagnosticsFor( R"( From d83485a080341688cbb9d5c07a3839bd97f9001e Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Mon, 28 Oct 2024 18:48:23 +0400 Subject: [PATCH 176/425] [clang][NFC] Add test for CWG issues about linkage in cross-TU context (#113736) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [CWG279](https://cplusplus.github.io/CWG/issues/279.html) Correspondence of "names for linkage purposes" ==================== [P1787R6](https://wg21.link/p1787r6): > CWG1884, CWG279, and CWG338 are resolved by defining entity identity explicitly. In both cases discussed in the issue, declaration matching succeeds per [[basic.link]/8](https://eel.is/c++draft/basic.link#8), thanks to the > considering declarations of unnamed types to introduce their names for linkage purposes, if any ([dcl.typedef], [dcl.enum]) and > they both declare names with external linkage parts. Which means that both pairs of declarations of `S` and `S2` declare the same entity. [[basic.link]/11](https://eel.is/c++draft/basic.link#11) is also satisfied, because in all cases type is declared. Then cases diverge. `S` is a pair of a forward declaration and a definition, which means it's not a subject to [[basic.def.odr]/15](https://eel.is/c++draft/basic.def.odr#15), hence it's well-formed. Whereas `S2` is a pair of two definitions, so they are subjects to [[basic.def.odr]/15](https://eel.is/c++draft/basic.def.odr#15), which they do not satisfy due to the violation of the following requirement: > — Each such definition shall consist of the same sequence of tokens, <...> Because per [[class.pre/2]](https://eel.is/c++draft/class#pre-2), class definition is class-specifier, which includes class-head, but class-head is different between definitions. [CWG338](https://cplusplus.github.io/CWG/issues/338.html) Enumerator name with linkage used as class name in other translation unit ==================== [P1787R6](https://wg21.link/p1787r6): > CWG1884, CWG279, and CWG338 are resolved by defining entity identity explicitly. I believe this CWG issue points to the same underlying issue with old [basic.link]/9 as CWG1884 (see below), so I'm calling it a duplicate of CWG1884. Cases described there are a part of an extensive CWG1884 test. Also worth noting that enumerators don't have linkage these days. [CWG1884](https://cplusplus.github.io/CWG/issues/1898.html) Unclear requirements for same-named external-linkage entities ==================== [P1787R6](https://wg21.link/p1787r6): > CWG1884, CWG279, and CWG338 are resolved by defining entity identity explicitly. [basic.link]/9 quoted in the issue is now split into several pieces. > Two names that are the same (6.1 [basic.pre]) I believe this is corresponding declarations now, defined in [[basic.scope.scope]/4](https://eel.is/c++draft/basic.scope#scope-4). > and that are declared in different scopes shall denote the same variable, function, type, enumerator, template or namespace if This is covered by [[basic.link]/11](https://eel.is/c++draft/basic.link#11) after it's determined by [[basic.link]/8](https://eel.is/c++draft/basic.link#8) that two declarations declare the same entity. > — both names have external linkage or else both names have internal linkage and are declared in the same translation unit; and > — both names refer to members of the same namespace or to members, not by inheritance, of the same class; and Most of this is covered by [[basic.link]/8](https://eel.is/c++draft/basic.link#8). > — when both names denote functions, the parameter-type-lists of the functions (9.3.4.6 [dcl.fct]) are identical; and > — when both names denote function templates, the signatures (13.7.7.2 [temp.over.link]) are the same. This is corresponding overloads now, defined in [[basic.scope.scope]/4](https://eel.is/c++draft/basic.scope#scope-4). > Among other things, it should be clarified that "declared in" refers to the namespace of which the name is a member, not the lexical scope in which the declaration appears (which affects friend declarations, block-scope extern declarations, and elaborated-type-specifiers). This is addressed by "have the same target scope" part of [[basic.link]/8](https://eel.is/c++draft/basic.link#8). The tests basically test [[basic.link]/11](https://eel.is/c++draft/basic.link#11) from the following standpoint: > The intent is that this rule prevents declaring a name with extenal linkage to be, for instance, a type in one translation unit and a namespace in a different translation unit. See the comment in the beginning of the test for details on testing approach. Reviewers are advised to check compiler output for any surprises, but if they are in a hurry, they can search for `FIXME`, `OK`, and `#cwg1884` to see the most of irregularities in Clang behavior across the test (not all of them are incorrect behavior, though). --- clang/test/CXX/drs/cwg1884.cpp | 643 +++++++++++++++++++++++++++++++++ clang/test/CXX/drs/cwg18xx.cpp | 2 + clang/test/CXX/drs/cwg279.cpp | 53 +++ clang/test/CXX/drs/cwg2xx.cpp | 2 + clang/test/CXX/drs/cwg3xx.cpp | 2 + clang/www/cxx_dr_status.html | 6 +- 6 files changed, 705 insertions(+), 3 deletions(-) create mode 100644 clang/test/CXX/drs/cwg1884.cpp create mode 100644 clang/test/CXX/drs/cwg279.cpp diff --git a/clang/test/CXX/drs/cwg1884.cpp b/clang/test/CXX/drs/cwg1884.cpp new file mode 100644 index 000000000000000..c4f76baa3933fbe --- /dev/null +++ b/clang/test/CXX/drs/cwg1884.cpp @@ -0,0 +1,643 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file --leading-lines %s %t +// RUN: %clang_cc1 -std=c++20 -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg1884_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg1884_A.pcm +// RUN: %clang_cc1 -std=c++20 -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg1884.cpp -fmodule-file=cwg1884_A=%t/cwg1884_A.pcm +// RUN: %clang_cc1 -std=c++23 -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg1884_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg1884_A.pcm +// RUN: %clang_cc1 -std=c++23 -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg1884.cpp -fmodule-file=cwg1884_A=%t/cwg1884_A.pcm +// RUN: %clang_cc1 -std=c++2c -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg1884_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg1884_A.pcm +// RUN: %clang_cc1 -std=c++2c -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg1884.cpp -fmodule-file=cwg1884_A=%t/cwg1884_A.pcm + +// cwg1884: partial +// Cases b11, e11, g3, g4 are problematic, but we handle the other 101 cases fine. + +// _N4993_.[basic.link]/11: +// For any two declarations of an entity E: +// — If one declares E to be a variable or function, +// the other shall declare E as one of the same type. +// — If one declares E to be an enumerator, the other shall do so. +// — If one declares E to be a namespace, the other shall do so. +// — If one declares E to be a type, +// the other shall declare E to be a type of the same kind (9.2.9.5). +// — If one declares E to be a class template, +// the other shall do so with the same kind and an equivalent template-head (13.7.7.2). +// [Note 5 : The declarations can supply different default template arguments. — end note] +// — If one declares E to be a function template or a (partial specialization of a) variable template, +// the other shall declare E to be one with an equivalent template-head and type. +// — If one declares E to be an alias template, +// the other shall declare E to be one with an equivalent template-head and defining-type-id. +// — If one declares E to be a concept, the other shall do so. +// Types are compared after all adjustments of types (during which typedefs (9.2.4) are replaced by their definitions); +// declarations for an array object can specify array types that differ by the presence or absence of a major array bound (9.3.4.5). +// No diagnostic is required if neither declaration is reachable from the other. + +// The structure of the test is the following. First, module cwg1884_A +// provides all (significant) kinds of entities, each named 'a' through 'h', and copies of them. +// Then the .cpp file does MxN kind of testing, where it tests one kind of entity against every other kind. + +//--- cwg1884_A.cppm +export module cwg1884_A; + +export { +int a1; +int a2; +int a3; +int a4; +int a5; +int a6; +int a7; +int a8; +int a9; +int a10; +int a11; +void b1(); +void b2(); +void b3(); +void b4(); +void b5(); +void b6(); +void b7(); +void b8(); +void b9(); +void b10(); +void b11(); +enum E { + c1, + c2, + c3, + c4, + c5, + c6, + c7, + c8, + c9, + c10 +}; +namespace d1 {} +namespace d2 {} +namespace d3 {} +namespace d4 {} +namespace d5 {} +namespace d6 {} +namespace d7 {} +namespace d8 {} +namespace d9 {} +namespace d10 {} +struct e1; +struct e2; +struct e3; +struct e4; +struct e5; +struct e6; +struct e7; +struct e8; +struct e9; +struct e10; +struct e11; +struct e12; +struct e13; +template +class f1; +template +class f2; +template +class f3; +template +class f4; +template +class f5; +template +class f6; +template +class f7; +template +class f8; +template +class f9; +template +class f10; +template +class f11; +template +void g1(int); +template +void g2(int); +template +void g3(int); +template +void g4(int); +template +void g5(int); +template +void g6(int); +template +void g7(int); +template +void g8(int); +template +void g9(int); +template +void g10(int); +template +int h1; +template +int h2; +template +int h3; +template +int h4; +template +int h5; +template +int h6; +template +int h7; +template +int h8; +template +int h9; +template +int h10; +template +using i1 = int; +template +using i2 = int; +template +using i3 = int; +template +using i4 = int; +template +using i5 = int; +template +using i6 = int; +template +using i7 = int; +template +using i8 = int; +template +using i9 = int; +template +using i10 = int; +template +using i11 = int; +template +concept j1 = true; +template +concept j2 = true; +template +concept j3 = true; +template +concept j4 = true; +template +concept j5 = true; +template +concept j6 = true; +template +concept j7 = true; +template +concept j8 = true; +template +concept j9 = true; +template +concept j10 = true; +template +concept j11 = true; +} // export + + +//--- cwg1884.cpp +import cwg1884_A; + +// FIXME: we don't diagnose several cases we should be. They are marked with MISSING prefix. + +// Part A: matching against `int a;` +// --------------------------------- + +void a1(); +// since-cxx20-error@-1 {{redefinition of 'a1' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:42 {{previous definition is here}} +enum Ea { + a2 + // since-cxx20-error@-1 {{redefinition of 'a2'}} + // since-cxx20-note@cwg1884_A.cppm:43 {{previous definition is here}} +}; +namespace a3 {} +// since-cxx20-error@-1 {{redefinition of 'a3' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:44 {{previous definition is here}} +struct a4; +// @-1 OK, types and variables do not correspond +template +class a5; +// since-cxx20-error@-1 {{redefinition of 'a5' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:46 {{previous definition is here}} +template +void a6(int); +// since-cxx20-error@-1 {{redefinition of 'a6' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:47 {{previous definition is here}} +template +int a7; +// since-cxx20-error@-1 {{redefinition of 'a7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:48 {{previous definition is here}} +template +int a8; +// since-cxx20-error@-1 {{redefinition of 'a8' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:49 {{previous definition is here}} +// since-cxx20-error@-3 {{expected ';' after top level declarator}} +template +using a9 = int; +// since-cxx20-error@-1 {{redefinition of 'a9' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:50 {{previous definition is here}} +template +concept a10 = true; +// since-cxx20-error@-1 {{redefinition of 'a10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:51 {{previous definition is here}} +// For variables, type has to match as well. +long a11; +// since-cxx20-error@-1 {{redefinition of 'a11' with a different type: 'long' vs 'int'}} +// since-cxx20-note@cwg1884_A.cppm:52 {{previous definition is here}} + + +// Part B: matching against `void b();` +// ------------------------------------ + +int b1; +// since-cxx20-error@-1 {{redefinition of 'b1' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:53 {{previous definition is here}} +enum Eb { + b2 + // since-cxx20-error@-1 {{redefinition of 'b2'}} + // since-cxx20-note@cwg1884_A.cppm:54 {{previous definition is here}} +}; +namespace b3 {} // #cwg1884-namespace-b +// since-cxx20-error@-1 {{redefinition of 'b3' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:55 {{previous definition is here}} +struct b4; +// @-1 OK, types and functions do not correspond +template +class b5; +// since-cxx20-error@-1 {{redefinition of 'b5' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:57 {{previous definition is here}} +template +void b6(int); +// @-1 OK, a non-corresponding overload +template +int b7; +// since-cxx20-error@-1 {{redefinition of 'b7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:59 {{previous definition is here}} +template +int b8; +// since-cxx20-error@-1 {{no variable template matches partial specialization}} +template +using b9 = int; +// since-cxx20-error@-1 {{redefinition of 'b9' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:61 {{previous definition is here}} +template +concept b10 = true; +// since-cxx20-error@-1 {{redefinition of 'b10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:62 {{previous definition is here}} +// For functions, type has to match as well. +// FIXME: we should be loud and clear here about type mismatch, like we do in `a11` case. +int b11(); +// since-cxx20-error@-1 {{declaration of 'b11' in the global module follows declaration in module cwg1884_A}} +// since-cxx20-note@cwg1884_A.cppm:63 {{previous declaration is here}} + + +// Part C: matching against `enum E { c };` +// ---------------------------------------- + +int c1; +// since-cxx20-error@-1 {{redefinition of 'c1' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:65 {{previous definition is here}} +void c2(); +// since-cxx20-error@-1 {{redefinition of 'c2' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:66 {{previous definition is here}} +namespace c3 {} +// since-cxx20-error@-1 {{redefinition of 'c3' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:67 {{previous definition is here}} +struct c4; +// @-1 OK, types and enumerators do not correspond +template +class c5; +// since-cxx20-error@-1 {{redefinition of 'c5' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:69 {{previous definition is here}} +template +void c6(int); +// since-cxx20-error@-1 {{redefinition of 'c6' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:70 {{previous definition is here}} +template +int c7; +// since-cxx20-error@-1 {{redefinition of 'c7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:71 {{previous definition is here}} +template +int c8; +// since-cxx20-error@-1 {{redefinition of 'c8' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:72 {{previous definition is here}} +// since-cxx20-error@-3 {{expected ';' after top level declarator}} +template +using c9 = int; +// since-cxx20-error@-1 {{redefinition of 'c9' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:73 {{previous definition is here}} +template +concept c10 = true; +// since-cxx20-error@-1 {{redefinition of 'c10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:74 {{previous definition is here}} + + +// Part D: matching against `namespace d {};` +// ------------------------------------------ + +int d1; +// since-cxx20-error@-1 {{redefinition of 'd1' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:76 {{previous definition is here}} +void d2(); +// since-cxx20-error@-1 {{redefinition of 'd2' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:77 {{previous definition is here}} +enum Ed { + d3 + // since-cxx20-error@-1 {{redefinition of 'd3'}} + // since-cxx20-note@cwg1884_A.cppm:78 {{previous definition is here}} +}; +struct d4; +// since-cxx20-error@-1 {{redefinition of 'd4' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:79 {{previous definition is here}} +template +class d5; +// since-cxx20-error@-1 {{redefinition of 'd5' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:80 {{previous definition is here}} +template +void d6(int); +// since-cxx20-error@-1 {{redefinition of 'd6' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:81 {{previous definition is here}} +template +int d7; +// since-cxx20-error@-1 {{redefinition of 'd7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:82 {{previous definition is here}} +template +int d8; +// since-cxx20-error@-1 {{redefinition of 'd8' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:83 {{previous definition is here}} +// since-cxx20-error@-3 {{expected ';' after top level declarator}} +template +using d9 = int; +// since-cxx20-error@-1 {{redefinition of 'd9' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:84 {{previous definition is here}} +template +concept d10 = true; +// since-cxx20-error@-1 {{redefinition of 'd10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:85 {{previous definition is here}} + + +// Part E: matching against `struct e;` +// ------------------------------------ + +int e1; +// @-1 OK, types and variables do not correspond +void e2(); +// @-1 OK, types and functions do not correspond +enum Ee { + e3 + // @-1 OK, types and enumerators do not correspond +}; +namespace e4 {} +// since-cxx20-error@-1 {{redefinition of 'e4' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:89 {{previous definition is here}} +template +class e5; +// since-cxx20-error@-1 {{redefinition of 'e5' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:90 {{previous definition is here}} +template +void e6(int); +// @-1 OK, types and function templates do not correspond +template +int e7; +// since-cxx20-error@-1 {{redefinition of 'e7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:92 {{previous definition is here}} +template +int e8; +// since-cxx20-error@-1 {{redefinition of 'e8' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:93 {{previous definition is here}} +// since-cxx20-error@-3 {{expected ';' after top level declarator}} +template +using e9 = int; +// since-cxx20-error@-1 {{redefinition of 'e9' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:94 {{previous definition is here}} +template +concept e10 = true; +// since-cxx20-error@-1 {{redefinition of 'e10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:95 {{previous definition is here}} +// FIXME: the following forward declaration is well-formed. +// Agreement on 'struct' vs 'class' is not required per [dcl.type.elab]/7. +class e11; +// since-cxx20-error@-1 {{declaration of 'e11' in the global module follows declaration in module cwg1884_A}} +// since-cxx20-note@cwg1884_A.cppm:96 {{previous declaration is here}} +union e12; +// since-cxx20-error@-1 {{use of 'e12' with tag type that does not match previous declaration}} +// since-cxx20-note@cwg1884_A.cppm:97 {{previous use is here}} +// since-cxx20-error@-3 {{declaration of 'e12' in the global module follows declaration in module cwg1884_A}} +// since-cxx20-note@cwg1884_A.cppm:97 {{previous declaration is here}} +enum e13 {}; +// since-cxx20-error@-1 {{use of 'e13' with tag type that does not match previous declaration}} +// since-cxx20-note@cwg1884_A.cppm:98 {{previous use is here}} + + +// Part F: matching against `template class f;` +// ------------------------------------------------------- + +int f1; +// since-cxx20-error@-1 {{redefinition of 'f1' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:100 {{previous definition is here}} +void f2(); +// since-cxx20-error@-1 {{redefinition of 'f2' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:102 {{previous definition is here}} +enum Ef { + f3 + // since-cxx20-error@-1 {{redefinition of 'f3'}} + // since-cxx20-note@cwg1884_A.cppm:104 {{previous definition is here}} +}; +namespace f4 {} +// since-cxx20-error@-1 {{redefinition of 'f4' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:106 {{previous definition is here}} +struct f5; +// since-cxx20-error@-1 {{redefinition of 'f5' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:108 {{previous definition is here}} +template +void f6(int); +// since-cxx20-error@-1 {{redefinition of 'f6' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:110 {{previous definition is here}} +template +int f7; +// since-cxx20-error@-1 {{redefinition of 'f7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:112 {{previous definition is here}} +template +int f8; +// since-cxx20-error@-1 {{no variable template matches partial specialization}} +template +using f9 = int; +// since-cxx20-error@-1 {{redefinition of 'f9' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:116 {{previous definition is here}} +template +concept f10 = true; +// since-cxx20-error@-1 {{redefinition of 'f10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:118 {{previous definition is here}} + + +// Part G: matching against `template void g(int);` +// ----------------------------------------------------------- + +int g1; +// since-cxx20-error@-1 {{redefinition of 'g1' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:122 {{previous definition is here}} +void g2(); +// @-1 OK, a non-corresponding overload +enum Eg { + g3 + // MISSING-since-cxx20-error@-1 {{redefinition of 'g3'}} + // MISSING-since-cxx20-note@cwg1884_A.cppm:126 {{previous definition is here}} +}; +namespace g4 {} +// MISSING-since-cxx20-error@-1 {{redefinition of 'g4' as different kind of symbol}} +// MISSING-since-cxx20-note@cwg1884_A.cppm:128 {{previous definition is here}} +struct g5; +// @-1 OK, types and function templates do not correspond +template +class g6; +// since-cxx20-error@-1 {{redefinition of 'g6' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:132 {{previous definition is here}} +template +int g7; +// since-cxx20-error@-1 {{redefinition of 'g7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:134 {{previous definition is here}} +template +int g8; +// since-cxx20-error@-1 {{no variable template matches specialization; did you mean to use 'g8' as function template instead?}} +template +using g9 = int; +// since-cxx20-error@-1 {{redefinition of 'g9' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:138 {{previous definition is here}} +template +concept g10 = true; +// since-cxx20-error@-1 {{redefinition of 'g10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:140 {{previous definition is here}} + + +// Part H: matching against `template int h;` +// --------------------------------------------------------------- + +int h1; +// since-cxx20-error@-1 {{redefinition of 'h1' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:142 {{previous definition is here}} +void h2(); +// since-cxx20-error@-1 {{redefinition of 'h2' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:144 {{previous definition is here}} +enum Eh { + h3 + // since-cxx20-error@-1 {{redefinition of 'h3'}} + // since-cxx20-note@cwg1884_A.cppm:146 {{previous definition is here}} +}; +namespace h4 {} +// since-cxx20-error@-1 {{redefinition of 'h4' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:148 {{previous definition is here}} +struct h5; +// since-cxx20-error@-1 {{redefinition of 'h5' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:150 {{previous definition is here}} +template +class h6; +// since-cxx20-error@-1 {{redefinition of 'h6' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:152 {{previous definition is here}} +template +void h7(int); +// since-cxx20-error@-1 {{redefinition of 'h7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:154 {{previous definition is here}} +template +int h8; +// @-1 OK, partial specialization +template +using h9 = int; +// since-cxx20-error@-1 {{redefinition of 'h9' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:158 {{previous definition is here}} +template +concept h10 = true; +// since-cxx20-error@-1 {{redefinition of 'h10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:160 {{previous definition is here}} + + +// Part I: matching against `template using i = int;` +// ------------------------------------------------------------- + +int i1; +// since-cxx20-error@-1 {{redefinition of 'i1' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:162 {{previous definition is here}} +void i2(); +// since-cxx20-error@-1 {{redefinition of 'i2' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:164 {{previous definition is here}} +enum Ei { + i3 + // since-cxx20-error@-1 {{redefinition of 'i3'}} + // since-cxx20-note@cwg1884_A.cppm:166 {{previous definition is here}} +}; +namespace i4 {} +// since-cxx20-error@-1 {{redefinition of 'i4' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:168 {{previous definition is here}} +struct i5; +// since-cxx20-error@-1 {{redefinition of 'i5' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:170 {{previous definition is here}} +template +class i6; +// since-cxx20-error@-1 {{redefinition of 'i6' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:172 {{previous definition is here}} +template +void i7(int); +// since-cxx20-error@-1 {{redefinition of 'i7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:174 {{previous definition is here}} +template +int i8; +// since-cxx20-error@-1 {{redefinition of 'i8' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:176 {{previous definition is here}} +template +int i9; +// since-cxx20-error@-1 {{no variable template matches partial specialization}} +template +concept i10 = true; +// since-cxx20-error@-1 {{redefinition of 'i10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:180 {{previous definition is here}} + + +// Part J: matching against `template concept j = true;` +// ---------------------------------------------------------------- + +int j1; +// since-cxx20-error@-1 {{redefinition of 'j1' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:184 {{previous definition is here}} +void j2(); +// since-cxx20-error@-1 {{redefinition of 'j2' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:186 {{previous definition is here}} +enum Ej { + j3 + // since-cxx20-error@-1 {{redefinition of 'j3'}} + // since-cxx20-note@cwg1884_A.cppm:188 {{previous definition is here}} +}; +namespace j4 {} +// since-cxx20-error@-1 {{redefinition of 'j4' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:190 {{previous definition is here}} +struct j5; +// since-cxx20-error@-1 {{redefinition of 'j5' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:192 {{previous definition is here}} +template +class j6; +// since-cxx20-error@-1 {{redefinition of 'j6' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:194 {{previous definition is here}} +template +void j7(int); +// since-cxx20-error@-1 {{redefinition of 'j7' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:196 {{previous definition is here}} +template +int j8; +// since-cxx20-error@-1 {{redefinition of 'j8' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:198 {{previous definition is here}} +template +int j9; +// since-cxx20-error@-1 {{no variable template matches partial specialization}} +template +using j10 = int; +// since-cxx20-error@-1 {{redefinition of 'j10' as different kind of symbol}} +// since-cxx20-note@cwg1884_A.cppm:202 {{previous definition is here}} diff --git a/clang/test/CXX/drs/cwg18xx.cpp b/clang/test/CXX/drs/cwg18xx.cpp index b059492637bd5cf..0fd2cd6b2d870c0 100644 --- a/clang/test/CXX/drs/cwg18xx.cpp +++ b/clang/test/CXX/drs/cwg18xx.cpp @@ -547,6 +547,8 @@ namespace cwg1881 { // cwg1881: 7 static_assert(!__is_standard_layout(D), ""); } +// cwg1884 is in cwg1884.cpp + namespace cwg1890 { // cwg1890: no drafting 2018-06-04 // FIXME: current consensus for CWG2335 is that the examples are well-formed. namespace ex1 { diff --git a/clang/test/CXX/drs/cwg279.cpp b/clang/test/CXX/drs/cwg279.cpp new file mode 100644 index 000000000000000..3c63486cc0dd5ef --- /dev/null +++ b/clang/test/CXX/drs/cwg279.cpp @@ -0,0 +1,53 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: split-file --leading-lines %s %t +// RUN: %clang_cc1 -std=c++20 -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg279_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg279_A.pcm +// RUN: %clang_cc1 -std=c++20 -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg279.cpp -fmodule-file=cwg279_A=%t/cwg279_A.pcm +// RUN: %clang_cc1 -std=c++23 -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg279_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg279_A.pcm +// RUN: %clang_cc1 -std=c++23 -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg279.cpp -fmodule-file=cwg279_A=%t/cwg279_A.pcm +// RUN: %clang_cc1 -std=c++2c -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg279_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg279_A.pcm +// RUN: %clang_cc1 -std=c++2c -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg279.cpp -fmodule-file=cwg279_A=%t/cwg279_A.pcm + +// cwg279: no + +//--- cwg279_A.cppm +export module cwg279_A; + +export { +struct S; // #cwg279-S +extern S *q; // #cwg279-q + +struct S2 {}; // #cwg279-S2 +extern S2 *q2; // #cwg279-q2 + +struct S3 {}; // #cwg279-S3 +extern S3 *q3; // #cwg279-q3 +} // export + +//--- cwg279.cpp +import cwg279_A; + +// FIXME: We should use markers instead. They are less fragile, +// but -verify doesn't support them across modules yet. +// FIXME: This is well-formed. Previous "definition" is actually just a declaration. +typedef struct {} S; +// since-cxx20-error@-1 {{typedef redefinition with different types ('struct S' vs 'S')}} +// since-cxx20-note@cwg279_A.cppm:17 {{previous definition is here}} +extern S *q; +// since-cxx20-error@-1 {{declaration of 'q' in the global module follows declaration in module cwg279_A}} +// since-cxx20-note@cwg279_A.cppm:18 {{previous declaration is here}} + +typedef struct {} S2; +// since-cxx20-error@-1 {{typedef redefinition with different types ('struct S2' vs 'S2')}} +// since-cxx20-note@cwg279_A.cppm:20 {{previous definition is here}} +extern S2 *q2; +// since-cxx20-error@-1 {{declaration of 'q2' in the global module follows declaration in module cwg279_A}} +// since-cxx20-note@cwg279_A.cppm:21 {{previous declaration is here}} + +// FIXME: This is well-formed, because [basic.def.odr]/15 is satisfied. +struct S3 {}; +// since-cxx20-error@-1 {{redefinition of 'S3'}} +// since-cxx20-note@cwg279_A.cppm:23 {{previous definition is here}} +extern S3 *q3; +// since-cxx20-error@-1 {{declaration of 'q3' in the global module follows declaration in module cwg279_A}} +// since-cxx20-note@cwg279_A.cppm:24 {{previous declaration is here}} diff --git a/clang/test/CXX/drs/cwg2xx.cpp b/clang/test/CXX/drs/cwg2xx.cpp index 926cb19596026b4..ec37b420880e28a 100644 --- a/clang/test/CXX/drs/cwg2xx.cpp +++ b/clang/test/CXX/drs/cwg2xx.cpp @@ -1032,6 +1032,8 @@ namespace cwg277 { // cwg277: 3.1 static_assert(__enable_constant_folding(!intp()), ""); } +// cwg279 is in cwg279.cpp + namespace cwg280 { // cwg280: 2.9 typedef void f0(); typedef void f1(int); diff --git a/clang/test/CXX/drs/cwg3xx.cpp b/clang/test/CXX/drs/cwg3xx.cpp index f20054c3701b1ce..10c8d86ed16a0db 100644 --- a/clang/test/CXX/drs/cwg3xx.cpp +++ b/clang/test/CXX/drs/cwg3xx.cpp @@ -637,6 +637,8 @@ namespace cwg337 { // cwg337: yes struct B { virtual ~B() = 0; }; } +// cwg338: dup 1884 + namespace cwg339 { // cwg339: 2.8 template struct A { static const int value = I; }; diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index 6640ed477a241e5..186f7cc0ace5465 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -1721,7 +1721,7 @@

C++ defect report implementation status

279 CD6 Correspondence of "names for linkage purposes" - Unknown + No 280 @@ -2075,7 +2075,7 @@

C++ defect report implementation status

338 CD6 Enumerator name with linkage used as class name in other translation unit - Unknown + Duplicate of 1884 339 @@ -11131,7 +11131,7 @@

C++ defect report implementation status

1884 CD6 Unclear requirements for same-named external-linkage entities - Unknown + Partial 1885 From 40363d506db8ab3382339dc4463372dcfcf931b7 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 28 Oct 2024 14:50:40 +0000 Subject: [PATCH 177/425] [RISCV] Add cost model tests for fp rounding ops for bf16. NFC --- llvm/test/Analysis/CostModel/RISCV/fround.ll | 890 +++++++++++++------ 1 file changed, 598 insertions(+), 292 deletions(-) diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll index b4740f223eca3a7..c6826760a45bee6 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -1,30 +1,50 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfhmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfh,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ZVFH +; RUN: opt < %s -passes="print" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfhmin,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN define void @floor() { ; CHECK-LABEL: 'floor' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.floor.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call @llvm.floor.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call @llvm.floor.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.floor.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.floor.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.floor.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.floor.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.floor.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.floor.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.floor.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.floor.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.floor.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.floor.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.floor.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.floor.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.floor.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.floor.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.floor.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.floor.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.floor.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.floor.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.floor.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.floor.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.floor.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.floor.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call @llvm.floor.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.floor.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call @llvm.floor.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = call @llvm.floor.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call @llvm.floor.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %29 = call @llvm.floor.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call bfloat @llvm.floor.bf16(bfloat undef) + call <2 x bfloat> @llvm.floor.v2bf16(<2 x bfloat> undef) + call <4 x bfloat> @llvm.floor.v4bf16(<4 x bfloat> undef) + call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> undef) + call <16 x bfloat> @llvm.floor.v16bf16(<16 x bfloat> undef) + call @llvm.floor.nvx1bf16( undef) + call @llvm.floor.nvx2bf16( undef) + call @llvm.floor.nvx4bf16( undef) + call @llvm.floor.nvx8bf16( undef) + call @llvm.floor.nvx16bf16( undef) call float @llvm.floor.f32(float undef) call <2 x float> @llvm.floor.v2f32(<2 x float> undef) call <4 x float> @llvm.floor.v4f32(<4 x float> undef) @@ -89,27 +109,47 @@ define void @floor_fp16() { define void @ceil() { ; CHECK-LABEL: 'ceil' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.ceil.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call @llvm.ceil.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call @llvm.ceil.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.ceil.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.ceil.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.ceil.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.ceil.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.ceil.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.ceil.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.ceil.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.ceil.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.ceil.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.ceil.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.ceil.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.ceil.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.ceil.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.ceil.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.ceil.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.ceil.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.ceil.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.ceil.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.ceil.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.ceil.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.ceil.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.ceil.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call @llvm.ceil.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.ceil.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call @llvm.ceil.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = call @llvm.ceil.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call @llvm.ceil.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %29 = call @llvm.ceil.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call bfloat @llvm.ceil.bf16(bfloat undef) + call <2 x bfloat> @llvm.ceil.v2bf16(<2 x bfloat> undef) + call <4 x bfloat> @llvm.ceil.v4bf16(<4 x bfloat> undef) + call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> undef) + call <16 x bfloat> @llvm.ceil.v16bf16(<16 x bfloat> undef) + call @llvm.ceil.nvx1bf16( undef) + call @llvm.ceil.nvx2bf16( undef) + call @llvm.ceil.nvx4bf16( undef) + call @llvm.ceil.nvx8bf16( undef) + call @llvm.ceil.nvx16bf16( undef) call float @llvm.ceil.f32(float undef) call <2 x float> @llvm.ceil.v2f32(<2 x float> undef) call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) @@ -174,27 +214,47 @@ define void @ceil_fp16() { define void @trunc() { ; CHECK-LABEL: 'trunc' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.trunc.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.trunc.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.trunc.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.trunc.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.trunc.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.trunc.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call @llvm.trunc.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.trunc.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x double> @llvm.trunc.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.trunc.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.trunc.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.trunc.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call @llvm.trunc.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.trunc.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.trunc.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.trunc.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.trunc.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.trunc.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.trunc.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.trunc.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.trunc.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.trunc.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.trunc.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x float> @llvm.trunc.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.trunc.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.trunc.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.trunc.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call @llvm.trunc.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call @llvm.trunc.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.trunc.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x double> @llvm.trunc.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.trunc.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %27 = call @llvm.trunc.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %28 = call @llvm.trunc.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %29 = call @llvm.trunc.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call bfloat @llvm.trunc.bf16(bfloat undef) + call <2 x bfloat> @llvm.trunc.v2bf16(<2 x bfloat> undef) + call <4 x bfloat> @llvm.trunc.v4bf16(<4 x bfloat> undef) + call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> undef) + call <16 x bfloat> @llvm.trunc.v16bf16(<16 x bfloat> undef) + call @llvm.trunc.nvx1bf16( undef) + call @llvm.trunc.nvx2bf16( undef) + call @llvm.trunc.nvx4bf16( undef) + call @llvm.trunc.nvx8bf16( undef) + call @llvm.trunc.nvx16bf16( undef) call float @llvm.trunc.f32(float undef) call <2 x float> @llvm.trunc.v2f32(<2 x float> undef) call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) @@ -259,27 +319,47 @@ define void @trunc_fp16() { define void @rint() { ; CHECK-LABEL: 'rint' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.rint.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.rint.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.rint.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.rint.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.rint.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.rint.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call @llvm.rint.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.rint.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x double> @llvm.rint.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.rint.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.rint.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.rint.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call @llvm.rint.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.rint.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.rint.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.rint.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.rint.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.rint.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.rint.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.rint.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.rint.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.rint.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.rint.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x float> @llvm.rint.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.rint.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.rint.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.rint.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call @llvm.rint.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call @llvm.rint.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.rint.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x double> @llvm.rint.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.rint.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %27 = call @llvm.rint.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %28 = call @llvm.rint.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %29 = call @llvm.rint.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call bfloat @llvm.rint.bf16(bfloat undef) + call <2 x bfloat> @llvm.rint.v2bf16(<2 x bfloat> undef) + call <4 x bfloat> @llvm.rint.v4bf16(<4 x bfloat> undef) + call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> undef) + call <16 x bfloat> @llvm.rint.v16bf16(<16 x bfloat> undef) + call @llvm.rint.nvx1bf16( undef) + call @llvm.rint.nvx2bf16( undef) + call @llvm.rint.nvx4bf16( undef) + call @llvm.rint.nvx8bf16( undef) + call @llvm.rint.nvx16bf16( undef) call float @llvm.rint.f32(float undef) call <2 x float> @llvm.rint.v2f32(<2 x float> undef) call <4 x float> @llvm.rint.v4f32(<4 x float> undef) @@ -344,27 +424,47 @@ define void @rint_fp16() { define void @lrint() { ; CHECK-LABEL: 'lrint' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.lrint.i64.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.lrint.nxv1i64.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.lrint.nxv2i64.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.lrint.nxv4i64.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.lrint.nxv8i64.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.lrint.i64.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.lrint.nxv1i64.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.lrint.nxv2i64.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.lrint.nxv4i64.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.lrint.nxv8i64.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.lrint.nxv16i64.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call i64 @llvm.lrint.i64.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.lrint.nxv1i64.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.lrint.nxv2i64.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.lrint.nxv4i64.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.lrint.nxv8i64.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call i64 @llvm.lrint.i64.bf16(bfloat undef) + call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef) + call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef) + call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef) + call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef) + call @llvm.lrint.nvx1i64.nvx1bf16( undef) + call @llvm.lrint.nvx2i64.nvx2bf16( undef) + call @llvm.lrint.nvx4i64.nvx4bf16( undef) + call @llvm.lrint.nvx8i64.nvx8bf16( undef) + call @llvm.lrint.nvx16i64.nvx16bf16( undef) call i64 @llvm.lrint.i64.f32(float undef) call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef) call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef) @@ -416,27 +516,47 @@ define void @lrint_fp16() { define void @llrint() { ; CHECK-LABEL: 'llrint' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.llrint.i64.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.llrint.nxv1i64.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.llrint.nxv2i64.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.llrint.nxv4i64.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.llrint.nxv8i64.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.llrint.i64.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.llrint.nxv1i64.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.llrint.nxv2i64.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.llrint.nxv4i64.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.llrint.nxv8i64.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %20 = call @llvm.llrint.nxv16i64.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call i64 @llvm.llrint.i64.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.llrint.nxv1i64.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.llrint.nxv2i64.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.llrint.nxv4i64.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.llrint.nxv8i64.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call i64 @llvm.llrint.i64.bf16(bfloat undef) + call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef) + call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef) + call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef) + call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef) + call @llvm.llrint.nvx1i64.nvx1bf16( undef) + call @llvm.llrint.nvx2i64.nvx2bf16( undef) + call @llvm.llrint.nvx4i64.nvx4bf16( undef) + call @llvm.llrint.nvx8i64.nvx8bf16( undef) + call @llvm.llrint.nvx16i64.nvx16bf16( undef) call i64 @llvm.llrint.i64.f32(float undef) call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef) call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef) @@ -488,27 +608,47 @@ define void @llrint_fp16() { define void @nearbyint() { ; CHECK-LABEL: 'nearbyint' -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.nearbyint.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call @llvm.nearbyint.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call @llvm.nearbyint.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.nearbyint.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.nearbyint.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.nearbyint.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.nearbyint.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.nearbyint.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.nearbyint.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.nearbyint.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.nearbyint.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call bfloat @llvm.nearbyint.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.nearbyint.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.nearbyint.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.nearbyint.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.nearbyint.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.nearbyint.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.nearbyint.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.nearbyint.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.nearbyint.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call float @llvm.nearbyint.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.nearbyint.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.nearbyint.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.nearbyint.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.nearbyint.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call @llvm.nearbyint.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %21 = call double @llvm.nearbyint.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call @llvm.nearbyint.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = call @llvm.nearbyint.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call @llvm.nearbyint.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %29 = call @llvm.nearbyint.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call bfloat @llvm.nearbyint.bf16(bfloat undef) + call <2 x bfloat> @llvm.nearbyint.v2bf16(<2 x bfloat> undef) + call <4 x bfloat> @llvm.nearbyint.v4bf16(<4 x bfloat> undef) + call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> undef) + call <16 x bfloat> @llvm.nearbyint.v16bf16(<16 x bfloat> undef) + call @llvm.nearbyint.nvx1bf16( undef) + call @llvm.nearbyint.nvx2bf16( undef) + call @llvm.nearbyint.nvx4bf16( undef) + call @llvm.nearbyint.nvx8bf16( undef) + call @llvm.nearbyint.nvx16bf16( undef) call float @llvm.nearbyint.f32(float undef) call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef) call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) @@ -573,27 +713,47 @@ define void @nearbyint_fp16() { define void @round() { ; CHECK-LABEL: 'round' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.round.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.round.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.round.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.round.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.round.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call @llvm.round.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call @llvm.round.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.round.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.round.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.round.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.round.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.round.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.round.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.round.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.round.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.round.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.round.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.round.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.round.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.round.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.round.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.round.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.round.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.round.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.round.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.round.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.round.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.round.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.round.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.round.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.round.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.round.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.round.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.round.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.round.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.round.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.round.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call @llvm.round.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.round.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.round.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.round.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.round.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.round.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call @llvm.round.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = call @llvm.round.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call @llvm.round.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %29 = call @llvm.round.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call bfloat @llvm.round.bf16(bfloat undef) + call <2 x bfloat> @llvm.round.v2bf16(<2 x bfloat> undef) + call <4 x bfloat> @llvm.round.v4bf16(<4 x bfloat> undef) + call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> undef) + call <16 x bfloat> @llvm.round.v16bf16(<16 x bfloat> undef) + call @llvm.round.nvx1bf16( undef) + call @llvm.round.nvx2bf16( undef) + call @llvm.round.nvx4bf16( undef) + call @llvm.round.nvx8bf16( undef) + call @llvm.round.nvx16bf16( undef) call float @llvm.round.f32(float undef) call <2 x float> @llvm.round.v2f32(<2 x float> undef) call <4 x float> @llvm.round.v4f32(<4 x float> undef) @@ -658,27 +818,47 @@ define void @round_fp16() { define void @roundeven() { ; CHECK-LABEL: 'roundeven' -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.roundeven.f32(float undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call @llvm.roundeven.nxv1f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call @llvm.roundeven.nxv2f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call @llvm.roundeven.nxv4f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call @llvm.roundeven.nxv8f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %10 = call @llvm.roundeven.nxv16f32( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.roundeven.f64(double undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.roundeven.v16f64(<16 x double> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.roundeven.nxv1f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.roundeven.nxv2f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.roundeven.nxv4f64( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.roundeven.nxv8f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.roundeven.bf16(bfloat undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.roundeven.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.roundeven.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.roundeven.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.roundeven.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.roundeven.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.roundeven.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.roundeven.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.roundeven.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.roundeven.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.roundeven.f32(float undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call @llvm.roundeven.nxv1f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call @llvm.roundeven.nxv2f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call @llvm.roundeven.nxv4f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %19 = call @llvm.roundeven.nxv8f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %20 = call @llvm.roundeven.nxv16f32( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.roundeven.f64(double undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.roundeven.v16f64(<16 x double> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %26 = call @llvm.roundeven.nxv1f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %27 = call @llvm.roundeven.nxv2f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %28 = call @llvm.roundeven.nxv4f64( undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %29 = call @llvm.roundeven.nxv8f64( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call bfloat @llvm.roundeven.bf16(bfloat undef) + call <2 x bfloat> @llvm.roundeven.v2bf16(<2 x bfloat> undef) + call <4 x bfloat> @llvm.roundeven.v4bf16(<4 x bfloat> undef) + call <8 x bfloat> @llvm.roundeven.v8bf16(<8 x bfloat> undef) + call <16 x bfloat> @llvm.roundeven.v16bf16(<16 x bfloat> undef) + call @llvm.roundeven.nvx1bf16( undef) + call @llvm.roundeven.nvx2bf16( undef) + call @llvm.roundeven.nvx4bf16( undef) + call @llvm.roundeven.nvx8bf16( undef) + call @llvm.roundeven.nvx16bf16( undef) call float @llvm.roundeven.f32(float undef) call <2 x float> @llvm.roundeven.v2f32(<2 x float> undef) call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef) @@ -743,25 +923,43 @@ define void @roundeven_fp16() { define void @vp_ceil() { ; CHECK-LABEL: 'vp_ceil' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.ceil.nxv1f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.ceil.nxv2f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.ceil.nxv4f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.ceil.nxv8f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.ceil.nxv16f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.ceil.nxv1f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.ceil.nxv2f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.ceil.nxv4f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.ceil.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.ceil.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.ceil.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.ceil.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.ceil.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.vp.ceil.nxv1bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.vp.ceil.nxv2bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.vp.ceil.nxv4bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.vp.ceil.nxv8bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.vp.ceil.nxv16bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.ceil.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.ceil.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.ceil.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.ceil.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.vp.ceil.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call @llvm.vp.ceil.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call @llvm.vp.ceil.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call @llvm.vp.ceil.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.vp.ceil.nxv8f64( undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call <2 x bfloat> @llvm.vp.ceil.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) + call <4 x bfloat> @llvm.vp.ceil.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) + call <8 x bfloat> @llvm.vp.ceil.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) + call <16 x bfloat> @llvm.vp.ceil.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.ceil.nvx1bf16( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx2bf16( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx4bf16( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx8bf16( undef, undef, i32 undef) + call @llvm.vp.ceil.nvx16bf16( undef, undef, i32 undef) call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) @@ -821,25 +1019,43 @@ define void @vp_ceil_f16() { define void @vp_floor() { ; CHECK-LABEL: 'vp_floor' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.floor.nxv1f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.floor.nxv2f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.floor.nxv4f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.floor.nxv8f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.floor.nxv16f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.floor.nxv1f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.floor.nxv2f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.floor.nxv4f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.floor.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.floor.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.floor.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.floor.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.floor.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.vp.floor.nxv1bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.vp.floor.nxv2bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.vp.floor.nxv4bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.vp.floor.nxv8bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.vp.floor.nxv16bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.floor.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.floor.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.floor.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.floor.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.vp.floor.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call @llvm.vp.floor.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call @llvm.vp.floor.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call @llvm.vp.floor.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.vp.floor.nxv8f64( undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call <2 x bfloat> @llvm.vp.floor.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) + call <4 x bfloat> @llvm.vp.floor.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) + call <8 x bfloat> @llvm.vp.floor.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) + call <16 x bfloat> @llvm.vp.floor.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.floor.nvx1bf16( undef, undef, i32 undef) + call @llvm.vp.floor.nvx2bf16( undef, undef, i32 undef) + call @llvm.vp.floor.nvx4bf16( undef, undef, i32 undef) + call @llvm.vp.floor.nvx8bf16( undef, undef, i32 undef) + call @llvm.vp.floor.nvx16bf16( undef, undef, i32 undef) call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) @@ -899,25 +1115,43 @@ define void @vp_floor_f16() { define void @vp_round() { ; CHECK-LABEL: 'vp_round' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.round.nxv1f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.round.nxv2f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.round.nxv4f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.round.nxv8f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.round.nxv16f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.round.nxv1f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.round.nxv2f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.round.nxv4f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.round.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.round.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.round.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.round.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.round.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.vp.round.nxv1bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.vp.round.nxv2bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.vp.round.nxv4bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.vp.round.nxv8bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.vp.round.nxv16bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.round.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.round.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.round.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.round.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.vp.round.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call @llvm.vp.round.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call @llvm.vp.round.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call @llvm.vp.round.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.vp.round.nxv8f64( undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call <2 x bfloat> @llvm.vp.round.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) + call <4 x bfloat> @llvm.vp.round.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) + call <8 x bfloat> @llvm.vp.round.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) + call <16 x bfloat> @llvm.vp.round.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.round.nvx1bf16( undef, undef, i32 undef) + call @llvm.vp.round.nvx2bf16( undef, undef, i32 undef) + call @llvm.vp.round.nvx4bf16( undef, undef, i32 undef) + call @llvm.vp.round.nvx8bf16( undef, undef, i32 undef) + call @llvm.vp.round.nvx16bf16( undef, undef, i32 undef) call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) @@ -977,25 +1211,43 @@ define void @vp_round_f16() { define void @vp_roundeven() { ; CHECK-LABEL: 'vp_roundeven' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.roundeven.nxv1f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.roundeven.nxv2f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.roundeven.nxv4f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.roundeven.nxv8f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.roundeven.nxv16f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.roundeven.nxv1f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.roundeven.nxv2f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.roundeven.nxv4f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.roundeven.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.roundeven.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.roundeven.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.roundeven.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.roundeven.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.vp.roundeven.nxv1bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.vp.roundeven.nxv2bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.vp.roundeven.nxv4bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.vp.roundeven.nxv8bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.vp.roundeven.nxv16bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.roundeven.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.roundeven.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.roundeven.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.roundeven.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.vp.roundeven.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call @llvm.vp.roundeven.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call @llvm.vp.roundeven.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call @llvm.vp.roundeven.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.vp.roundeven.nxv8f64( undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call <2 x bfloat> @llvm.vp.roundeven.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) + call <4 x bfloat> @llvm.vp.roundeven.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) + call <8 x bfloat> @llvm.vp.roundeven.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) + call <16 x bfloat> @llvm.vp.roundeven.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.roundeven.nvx1bf16( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx2bf16( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx4bf16( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx8bf16( undef, undef, i32 undef) + call @llvm.vp.roundeven.nvx16bf16( undef, undef, i32 undef) call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) @@ -1055,25 +1307,43 @@ define void @vp_roundeven_f16() { define void @vp_roundtozero() { ; CHECK-LABEL: 'vp_roundtozero' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.roundtozero.nxv1f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.roundtozero.nxv2f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.roundtozero.nxv4f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.roundtozero.nxv8f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.roundtozero.nxv16f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.roundtozero.nxv1f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.roundtozero.nxv2f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.roundtozero.nxv4f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.roundtozero.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.roundtozero.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.roundtozero.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.roundtozero.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.roundtozero.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.vp.roundtozero.nxv1bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.vp.roundtozero.nxv2bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.vp.roundtozero.nxv4bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.vp.roundtozero.nxv8bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.vp.roundtozero.nxv16bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.roundtozero.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.roundtozero.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.roundtozero.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.roundtozero.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.vp.roundtozero.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call @llvm.vp.roundtozero.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call @llvm.vp.roundtozero.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call @llvm.vp.roundtozero.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.vp.roundtozero.nxv8f64( undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call <2 x bfloat> @llvm.vp.roundtozero.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) + call <4 x bfloat> @llvm.vp.roundtozero.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) + call <8 x bfloat> @llvm.vp.roundtozero.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) + call <16 x bfloat> @llvm.vp.roundtozero.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.roundtozero.nvx1bf16( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx2bf16( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx4bf16( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx8bf16( undef, undef, i32 undef) + call @llvm.vp.roundtozero.nvx16bf16( undef, undef, i32 undef) call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) @@ -1133,25 +1403,43 @@ define void @vp_roundtozero_f16() { define void @vp_rint() { ; CHECK-LABEL: 'vp_rint' -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %3 = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %4 = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %5 = call @llvm.vp.rint.nxv1f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %6 = call @llvm.vp.rint.nxv2f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %7 = call @llvm.vp.rint.nxv4f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %8 = call @llvm.vp.rint.nxv8f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %9 = call @llvm.vp.rint.nxv16f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %10 = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %11 = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %14 = call @llvm.vp.rint.nxv1f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = call @llvm.vp.rint.nxv2f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = call @llvm.vp.rint.nxv4f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = call @llvm.vp.rint.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.rint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.rint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.rint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.rint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.vp.rint.nxv1bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.vp.rint.nxv2bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.vp.rint.nxv4bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.vp.rint.nxv8bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.vp.rint.nxv16bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %10 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %11 = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %12 = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %13 = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %14 = call @llvm.vp.rint.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %15 = call @llvm.vp.rint.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %16 = call @llvm.vp.rint.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %17 = call @llvm.vp.rint.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %18 = call @llvm.vp.rint.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %19 = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %20 = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %21 = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %22 = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %23 = call @llvm.vp.rint.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %24 = call @llvm.vp.rint.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %25 = call @llvm.vp.rint.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %26 = call @llvm.vp.rint.nxv8f64( undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call <2 x bfloat> @llvm.vp.rint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) + call <4 x bfloat> @llvm.vp.rint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) + call <8 x bfloat> @llvm.vp.rint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) + call <16 x bfloat> @llvm.vp.rint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.rint.nvx1bf16( undef, undef, i32 undef) + call @llvm.vp.rint.nvx2bf16( undef, undef, i32 undef) + call @llvm.vp.rint.nvx4bf16( undef, undef, i32 undef) + call @llvm.vp.rint.nvx8bf16( undef, undef, i32 undef) + call @llvm.vp.rint.nvx16bf16( undef, undef, i32 undef) call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) @@ -1211,25 +1499,43 @@ define void @vp_rint_f16() { define void @vp_nearbyint() { ; CHECK-LABEL: 'vp_nearbyint' -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call @llvm.vp.nearbyint.nxv1f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call @llvm.vp.nearbyint.nxv2f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call @llvm.vp.nearbyint.nxv4f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call @llvm.vp.nearbyint.nxv8f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call @llvm.vp.nearbyint.nxv16f32( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.nearbyint.nxv1f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.nearbyint.nxv2f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.nearbyint.nxv4f64( undef, undef, i32 undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.nearbyint.nxv8f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.nearbyint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.nearbyint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.nearbyint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.nearbyint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.vp.nearbyint.nxv1bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.vp.nearbyint.nxv2bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.vp.nearbyint.nxv4bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.vp.nearbyint.nxv8bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.vp.nearbyint.nxv16bf16( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call @llvm.vp.nearbyint.nxv1f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call @llvm.vp.nearbyint.nxv2f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call @llvm.vp.nearbyint.nxv4f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call @llvm.vp.nearbyint.nxv8f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %18 = call @llvm.vp.nearbyint.nxv16f32( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %23 = call @llvm.vp.nearbyint.nxv1f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %24 = call @llvm.vp.nearbyint.nxv2f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %25 = call @llvm.vp.nearbyint.nxv4f64( undef, undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %26 = call @llvm.vp.nearbyint.nxv8f64( undef, undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + call <2 x bfloat> @llvm.vp.nearbyint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef) + call <4 x bfloat> @llvm.vp.nearbyint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef) + call <8 x bfloat> @llvm.vp.nearbyint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef) + call <16 x bfloat> @llvm.vp.nearbyint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef) + call @llvm.vp.nearbyint.nvx1bf16( undef, undef, i32 undef) + call @llvm.vp.nearbyint.nvx2bf16( undef, undef, i32 undef) + call @llvm.vp.nearbyint.nvx4bf16( undef, undef, i32 undef) + call @llvm.vp.nearbyint.nvx8bf16( undef, undef, i32 undef) + call @llvm.vp.nearbyint.nvx16bf16( undef, undef, i32 undef) call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) From c9f01f699cc55929b18befc0fa34d70630fc9074 Mon Sep 17 00:00:00 2001 From: Sushant Gokhale Date: Mon, 28 Oct 2024 20:37:41 +0530 Subject: [PATCH 178/425] [SLP][AArch64][NFC] Add more tests for SLP vectorization of div (#113876) Currently, we dont have much tests that show SLP outcome for integer divisions. This patch adds tests for same. In certain scenarios, for Neon, vectorization is profitable. An attempt would be made in future to improve the cost-model for the same. --- .../Transforms/SLPVectorizer/AArch64/div.ll | 553 ++++++++++++++++++ 1 file changed, 553 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/AArch64/div.ll diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll new file mode 100644 index 000000000000000..e972955e26cb475 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=aarch64 -passes=slp-vectorizer -mattr=-sve -S < %s | FileCheck %s --check-prefixes=CHECK,NO-SVE +; RUN: opt -mtriple=aarch64 -passes=slp-vectorizer -mattr=+sve -S < %s | FileCheck %s --check-prefixes=CHECK,SVE + +define <2 x i8> @slp_v2i8_Op1_Op2_unknown(<2 x i8> %a, <2 x i8> %b) +; NO-SVE-LABEL: define <2 x i8> @slp_v2i8_Op1_Op2_unknown( +; NO-SVE-SAME: <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i8> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i8> [[A]], i32 1 +; NO-SVE-NEXT: [[B0:%.*]] = extractelement <2 x i8> [[B]], i32 0 +; NO-SVE-NEXT: [[B1:%.*]] = extractelement <2 x i8> [[B]], i32 1 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i8 [[A0]], [[B0]] +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i8 [[A1]], [[B1]] +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <2 x i8> [[R0]], i8 [[TMP2]], i32 1 +; NO-SVE-NEXT: ret <2 x i8> [[R1]] +; +; SVE-LABEL: define <2 x i8> @slp_v2i8_Op1_Op2_unknown( +; SVE-SAME: <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i8> [[A]], [[B]] +; SVE-NEXT: ret <2 x i8> [[TMP1]] +; +{ + %a0 = extractelement <2 x i8> %a, i32 0 + %a1 = extractelement <2 x i8> %a, i32 1 + %b0 = extractelement <2 x i8> %b, i32 0 + %b1 = extractelement <2 x i8> %b, i32 1 + %1 = sdiv i8 %a0, %b0 + %2 = sdiv i8 %a1, %b1 + %r0 = insertelement <2 x i8> poison, i8 %1, i32 0 + %r1 = insertelement <2 x i8> %r0, i8 %2, i32 1 + ret <2 x i8> %r1 +} + +define <2 x i16> @slp_v2i16_Op1_Op2_unknown(<2 x i16> %a, <2 x i16> %b) +; NO-SVE-LABEL: define <2 x i16> @slp_v2i16_Op1_Op2_unknown( +; NO-SVE-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i16> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i16> [[A]], i32 1 +; NO-SVE-NEXT: [[B0:%.*]] = extractelement <2 x i16> [[B]], i32 0 +; NO-SVE-NEXT: [[B1:%.*]] = extractelement <2 x i16> [[B]], i32 1 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i16 [[A0]], [[B0]] +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i16 [[A1]], [[B1]] +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <2 x i16> [[R0]], i16 [[TMP2]], i32 1 +; NO-SVE-NEXT: ret <2 x i16> [[R1]] +; +; SVE-LABEL: define <2 x i16> @slp_v2i16_Op1_Op2_unknown( +; SVE-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[A]], [[B]] +; SVE-NEXT: ret <2 x i16> [[TMP1]] +; +{ + %a0 = extractelement <2 x i16> %a, i32 0 + %a1 = extractelement <2 x i16> %a, i32 1 + %b0 = extractelement <2 x i16> %b, i32 0 + %b1 = extractelement <2 x i16> %b, i32 1 + %1 = sdiv i16 %a0, %b0 + %2 = sdiv i16 %a1, %b1 + %r0 = insertelement <2 x i16> poison, i16 %1, i32 0 + %r1 = insertelement <2 x i16> %r0, i16 %2, i32 1 + ret <2 x i16> %r1 +} + +define <2 x i32> @slp_v2i32_Op1_Op2_unknown(<2 x i32> %a, <2 x i32> %b) +; NO-SVE-LABEL: define <2 x i32> @slp_v2i32_Op1_Op2_unknown( +; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i32> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i32> [[A]], i32 1 +; NO-SVE-NEXT: [[B0:%.*]] = extractelement <2 x i32> [[B]], i32 0 +; NO-SVE-NEXT: [[B1:%.*]] = extractelement <2 x i32> [[B]], i32 1 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], [[B0]] +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], [[B1]] +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <2 x i32> [[R0]], i32 [[TMP2]], i32 1 +; NO-SVE-NEXT: ret <2 x i32> [[R1]] +; +; SVE-LABEL: define <2 x i32> @slp_v2i32_Op1_Op2_unknown( +; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[A]], [[B]] +; SVE-NEXT: ret <2 x i32> [[TMP1]] +; +{ + %a0 = extractelement <2 x i32> %a, i32 0 + %a1 = extractelement <2 x i32> %a, i32 1 + %b0 = extractelement <2 x i32> %b, i32 0 + %b1 = extractelement <2 x i32> %b, i32 1 + %1 = sdiv i32 %a0, %b0 + %2 = sdiv i32 %a1, %b1 + %r0 = insertelement <2 x i32> poison, i32 %1, i32 0 + %r1 = insertelement <2 x i32> %r0, i32 %2, i32 1 + ret <2 x i32> %r1 +} + +define <2 x i64> @slp_v2i64_Op1_Op2_unknown(<2 x i64> %a, <2 x i64> %b) +; NO-SVE-LABEL: define <2 x i64> @slp_v2i64_Op1_Op2_unknown( +; NO-SVE-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <2 x i64> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <2 x i64> [[A]], i32 1 +; NO-SVE-NEXT: [[B0:%.*]] = extractelement <2 x i64> [[B]], i32 0 +; NO-SVE-NEXT: [[B1:%.*]] = extractelement <2 x i64> [[B]], i32 1 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i64 [[A0]], [[B0]] +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i64 [[A1]], [[B1]] +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <2 x i64> [[R0]], i64 [[TMP2]], i32 1 +; NO-SVE-NEXT: ret <2 x i64> [[R1]] +; +; SVE-LABEL: define <2 x i64> @slp_v2i64_Op1_Op2_unknown( +; SVE-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = sdiv <2 x i64> [[A]], [[B]] +; SVE-NEXT: ret <2 x i64> [[TMP1]] +; +{ + %a0 = extractelement <2 x i64> %a, i32 0 + %a1 = extractelement <2 x i64> %a, i32 1 + %b0 = extractelement <2 x i64> %b, i32 0 + %b1 = extractelement <2 x i64> %b, i32 1 + %1 = sdiv i64 %a0, %b0 + %2 = sdiv i64 %a1, %b1 + %r0 = insertelement <2 x i64> poison, i64 %1, i32 0 + %r1 = insertelement <2 x i64> %r0, i64 %2, i32 1 + ret <2 x i64> %r1 +} + +define <4 x i8> @slp_v4i8_Op1_Op2_unknown(<4 x i8> %a, <4 x i8> %b) +; NO-SVE-LABEL: define <4 x i8> @slp_v4i8_Op1_Op2_unknown( +; NO-SVE-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i8> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i8> [[A]], i32 1 +; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i8> [[A]], i32 2 +; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i8> [[A]], i32 3 +; NO-SVE-NEXT: [[B0:%.*]] = extractelement <4 x i8> [[B]], i32 0 +; NO-SVE-NEXT: [[B1:%.*]] = extractelement <4 x i8> [[B]], i32 1 +; NO-SVE-NEXT: [[B2:%.*]] = extractelement <4 x i8> [[B]], i32 2 +; NO-SVE-NEXT: [[B3:%.*]] = extractelement <4 x i8> [[B]], i32 3 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i8 [[A0]], [[B0]] +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i8 [[A1]], [[B1]] +; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i8 [[A2]], [[B2]] +; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i8 [[A3]], [[B3]] +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i8> [[R0]], i8 [[TMP2]], i32 1 +; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i8> [[R1]], i8 [[TMP3]], i32 2 +; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i8> [[R2]], i8 [[TMP4]], i32 3 +; NO-SVE-NEXT: ret <4 x i8> [[R3]] +; +; SVE-LABEL: define <4 x i8> @slp_v4i8_Op1_Op2_unknown( +; SVE-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i8> [[A]], [[B]] +; SVE-NEXT: ret <4 x i8> [[TMP1]] +; +{ + %a0 = extractelement <4 x i8> %a, i32 0 + %a1 = extractelement <4 x i8> %a, i32 1 + %a2 = extractelement <4 x i8> %a, i32 2 + %a3 = extractelement <4 x i8> %a, i32 3 + %b0 = extractelement <4 x i8> %b, i32 0 + %b1 = extractelement <4 x i8> %b, i32 1 + %b2 = extractelement <4 x i8> %b, i32 2 + %b3 = extractelement <4 x i8> %b, i32 3 + %1 = sdiv i8 %a0, %b0 + %2 = sdiv i8 %a1, %b1 + %3 = sdiv i8 %a2, %b2 + %4 = sdiv i8 %a3, %b3 + %r0 = insertelement <4 x i8> poison, i8 %1, i32 0 + %r1 = insertelement <4 x i8> %r0, i8 %2, i32 1 + %r2 = insertelement <4 x i8> %r1, i8 %3, i32 2 + %r3 = insertelement <4 x i8> %r2, i8 %4, i32 3 + ret <4 x i8> %r3 +} + +define <4 x i16> @slp_v4i16_Op1_Op2_unknown(<4 x i16> %a, <4 x i16> %b) +; NO-SVE-LABEL: define <4 x i16> @slp_v4i16_Op1_Op2_unknown( +; NO-SVE-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i16> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i16> [[A]], i32 1 +; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i16> [[A]], i32 2 +; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i16> [[A]], i32 3 +; NO-SVE-NEXT: [[B0:%.*]] = extractelement <4 x i16> [[B]], i32 0 +; NO-SVE-NEXT: [[B1:%.*]] = extractelement <4 x i16> [[B]], i32 1 +; NO-SVE-NEXT: [[B2:%.*]] = extractelement <4 x i16> [[B]], i32 2 +; NO-SVE-NEXT: [[B3:%.*]] = extractelement <4 x i16> [[B]], i32 3 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i16 [[A0]], [[B0]] +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i16 [[A1]], [[B1]] +; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i16 [[A2]], [[B2]] +; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i16 [[A3]], [[B3]] +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i16> [[R0]], i16 [[TMP2]], i32 1 +; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i16> [[R1]], i16 [[TMP3]], i32 2 +; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i16> [[R2]], i16 [[TMP4]], i32 3 +; NO-SVE-NEXT: ret <4 x i16> [[R3]] +; +; SVE-LABEL: define <4 x i16> @slp_v4i16_Op1_Op2_unknown( +; SVE-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i16> [[A]], [[B]] +; SVE-NEXT: ret <4 x i16> [[TMP1]] +; +{ + %a0 = extractelement <4 x i16> %a, i32 0 + %a1 = extractelement <4 x i16> %a, i32 1 + %a2 = extractelement <4 x i16> %a, i32 2 + %a3 = extractelement <4 x i16> %a, i32 3 + %b0 = extractelement <4 x i16> %b, i32 0 + %b1 = extractelement <4 x i16> %b, i32 1 + %b2 = extractelement <4 x i16> %b, i32 2 + %b3 = extractelement <4 x i16> %b, i32 3 + %1 = sdiv i16 %a0, %b0 + %2 = sdiv i16 %a1, %b1 + %3 = sdiv i16 %a2, %b2 + %4 = sdiv i16 %a3, %b3 + %r0 = insertelement <4 x i16> poison, i16 %1, i32 0 + %r1 = insertelement <4 x i16> %r0, i16 %2, i32 1 + %r2 = insertelement <4 x i16> %r1, i16 %3, i32 2 + %r3 = insertelement <4 x i16> %r2, i16 %4, i32 3 + ret <4 x i16> %r3 +} + +define <4 x i32> @slp_v4i32_Op1_Op2_unknown(<4 x i32> %a, <4 x i32> %b) +; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_Op2_unknown( +; NO-SVE-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1 +; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 +; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3 +; NO-SVE-NEXT: [[B0:%.*]] = extractelement <4 x i32> [[B]], i32 0 +; NO-SVE-NEXT: [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1 +; NO-SVE-NEXT: [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2 +; NO-SVE-NEXT: [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], [[B0]] +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], [[B1]] +; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i32 [[A2]], [[B2]] +; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i32 [[A3]], [[B3]] +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1 +; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2 +; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3 +; NO-SVE-NEXT: ret <4 x i32> [[R3]] +; +; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_Op2_unknown( +; SVE-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], [[B]] +; SVE-NEXT: ret <4 x i32> [[TMP1]] +; +{ + %a0 = extractelement <4 x i32> %a, i32 0 + %a1 = extractelement <4 x i32> %a, i32 1 + %a2 = extractelement <4 x i32> %a, i32 2 + %a3 = extractelement <4 x i32> %a, i32 3 + %b0 = extractelement <4 x i32> %b, i32 0 + %b1 = extractelement <4 x i32> %b, i32 1 + %b2 = extractelement <4 x i32> %b, i32 2 + %b3 = extractelement <4 x i32> %b, i32 3 + %1 = sdiv i32 %a0, %b0 + %2 = sdiv i32 %a1, %b1 + %3 = sdiv i32 %a2, %b2 + %4 = sdiv i32 %a3, %b3 + %r0 = insertelement <4 x i32> poison, i32 %1, i32 0 + %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1 + %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2 + %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3 + ret <4 x i32> %r3 +} + +define <8 x i8> @slp_v8i8_Op1_Op2_unknown(<8 x i8> %a, <8 x i8> %b) +; NO-SVE-LABEL: define <8 x i8> @slp_v8i8_Op1_Op2_unknown( +; NO-SVE-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <8 x i8> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <8 x i8> [[A]], i32 1 +; NO-SVE-NEXT: [[A2:%.*]] = extractelement <8 x i8> [[A]], i32 2 +; NO-SVE-NEXT: [[A3:%.*]] = extractelement <8 x i8> [[A]], i32 3 +; NO-SVE-NEXT: [[A4:%.*]] = extractelement <8 x i8> [[A]], i32 4 +; NO-SVE-NEXT: [[A5:%.*]] = extractelement <8 x i8> [[A]], i32 5 +; NO-SVE-NEXT: [[A6:%.*]] = extractelement <8 x i8> [[A]], i32 6 +; NO-SVE-NEXT: [[A7:%.*]] = extractelement <8 x i8> [[A]], i32 7 +; NO-SVE-NEXT: [[B0:%.*]] = extractelement <8 x i8> [[B]], i32 0 +; NO-SVE-NEXT: [[B1:%.*]] = extractelement <8 x i8> [[B]], i32 1 +; NO-SVE-NEXT: [[B2:%.*]] = extractelement <8 x i8> [[B]], i32 2 +; NO-SVE-NEXT: [[B3:%.*]] = extractelement <8 x i8> [[B]], i32 3 +; NO-SVE-NEXT: [[B4:%.*]] = extractelement <8 x i8> [[B]], i32 4 +; NO-SVE-NEXT: [[B5:%.*]] = extractelement <8 x i8> [[B]], i32 5 +; NO-SVE-NEXT: [[B6:%.*]] = extractelement <8 x i8> [[B]], i32 6 +; NO-SVE-NEXT: [[B7:%.*]] = extractelement <8 x i8> [[B]], i32 7 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i8 [[A0]], [[B0]] +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i8 [[A1]], [[B1]] +; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i8 [[A2]], [[B2]] +; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i8 [[A3]], [[B3]] +; NO-SVE-NEXT: [[TMP5:%.*]] = sdiv i8 [[A4]], [[B4]] +; NO-SVE-NEXT: [[TMP6:%.*]] = sdiv i8 [[A5]], [[B5]] +; NO-SVE-NEXT: [[TMP7:%.*]] = sdiv i8 [[A6]], [[B6]] +; NO-SVE-NEXT: [[TMP8:%.*]] = sdiv i8 [[A7]], [[B7]] +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <8 x i8> [[R0]], i8 [[TMP2]], i32 1 +; NO-SVE-NEXT: [[R2:%.*]] = insertelement <8 x i8> [[R1]], i8 [[TMP3]], i32 2 +; NO-SVE-NEXT: [[R3:%.*]] = insertelement <8 x i8> [[R2]], i8 [[TMP4]], i32 3 +; NO-SVE-NEXT: [[R4:%.*]] = insertelement <8 x i8> [[R3]], i8 [[TMP5]], i32 4 +; NO-SVE-NEXT: [[R5:%.*]] = insertelement <8 x i8> [[R4]], i8 [[TMP6]], i32 5 +; NO-SVE-NEXT: [[R6:%.*]] = insertelement <8 x i8> [[R5]], i8 [[TMP7]], i32 6 +; NO-SVE-NEXT: [[R7:%.*]] = insertelement <8 x i8> [[R6]], i8 [[TMP8]], i32 7 +; NO-SVE-NEXT: ret <8 x i8> [[R3]] +; +; SVE-LABEL: define <8 x i8> @slp_v8i8_Op1_Op2_unknown( +; SVE-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> +; SVE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> +; SVE-NEXT: [[TMP3:%.*]] = sdiv <4 x i8> [[TMP1]], [[TMP2]] +; SVE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> +; SVE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> +; SVE-NEXT: [[TMP6:%.*]] = sdiv <4 x i8> [[TMP4]], [[TMP5]] +; SVE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <8 x i32> +; SVE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> +; SVE-NEXT: [[R71:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP8]], <8 x i32> +; SVE-NEXT: ret <8 x i8> [[TMP7]] +; +{ + %a0 = extractelement <8 x i8> %a, i32 0 + %a1 = extractelement <8 x i8> %a, i32 1 + %a2 = extractelement <8 x i8> %a, i32 2 + %a3 = extractelement <8 x i8> %a, i32 3 + %a4 = extractelement <8 x i8> %a, i32 4 + %a5 = extractelement <8 x i8> %a, i32 5 + %a6 = extractelement <8 x i8> %a, i32 6 + %a7 = extractelement <8 x i8> %a, i32 7 + %b0 = extractelement <8 x i8> %b, i32 0 + %b1 = extractelement <8 x i8> %b, i32 1 + %b2 = extractelement <8 x i8> %b, i32 2 + %b3 = extractelement <8 x i8> %b, i32 3 + %b4 = extractelement <8 x i8> %b, i32 4 + %b5 = extractelement <8 x i8> %b, i32 5 + %b6 = extractelement <8 x i8> %b, i32 6 + %b7 = extractelement <8 x i8> %b, i32 7 + %1 = sdiv i8 %a0, %b0 + %2 = sdiv i8 %a1, %b1 + %3 = sdiv i8 %a2, %b2 + %4 = sdiv i8 %a3, %b3 + %5 = sdiv i8 %a4, %b4 + %6 = sdiv i8 %a5, %b5 + %7 = sdiv i8 %a6, %b6 + %8 = sdiv i8 %a7, %b7 + %r0 = insertelement <8 x i8> poison, i8 %1, i32 0 + %r1 = insertelement <8 x i8> %r0, i8 %2, i32 1 + %r2 = insertelement <8 x i8> %r1, i8 %3, i32 2 + %r3 = insertelement <8 x i8> %r2, i8 %4, i32 3 + %r4 = insertelement <8 x i8> %r3, i8 %5, i32 4 + %r5 = insertelement <8 x i8> %r4, i8 %6, i32 5 + %r6 = insertelement <8 x i8> %r5, i8 %7, i32 6 + %r7 = insertelement <8 x i8> %r6, i8 %8, i32 7 + ret <8 x i8> %r3 +} + +define <8 x i16> @slp_v8i16_Op1_Op2_unknown(<8 x i16> %a, <8 x i16> %b) +; NO-SVE-LABEL: define <8 x i16> @slp_v8i16_Op1_Op2_unknown( +; NO-SVE-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <8 x i16> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <8 x i16> [[A]], i32 1 +; NO-SVE-NEXT: [[A2:%.*]] = extractelement <8 x i16> [[A]], i32 2 +; NO-SVE-NEXT: [[A3:%.*]] = extractelement <8 x i16> [[A]], i32 3 +; NO-SVE-NEXT: [[A4:%.*]] = extractelement <8 x i16> [[A]], i32 4 +; NO-SVE-NEXT: [[A5:%.*]] = extractelement <8 x i16> [[A]], i32 5 +; NO-SVE-NEXT: [[A6:%.*]] = extractelement <8 x i16> [[A]], i32 6 +; NO-SVE-NEXT: [[A7:%.*]] = extractelement <8 x i16> [[A]], i32 7 +; NO-SVE-NEXT: [[B0:%.*]] = extractelement <8 x i16> [[B]], i32 0 +; NO-SVE-NEXT: [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1 +; NO-SVE-NEXT: [[B2:%.*]] = extractelement <8 x i16> [[B]], i32 2 +; NO-SVE-NEXT: [[B3:%.*]] = extractelement <8 x i16> [[B]], i32 3 +; NO-SVE-NEXT: [[B4:%.*]] = extractelement <8 x i16> [[B]], i32 4 +; NO-SVE-NEXT: [[B5:%.*]] = extractelement <8 x i16> [[B]], i32 5 +; NO-SVE-NEXT: [[B6:%.*]] = extractelement <8 x i16> [[B]], i32 6 +; NO-SVE-NEXT: [[B7:%.*]] = extractelement <8 x i16> [[B]], i32 7 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i16 [[A0]], [[B0]] +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i16 [[A1]], [[B1]] +; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i16 [[A2]], [[B2]] +; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i16 [[A3]], [[B3]] +; NO-SVE-NEXT: [[TMP5:%.*]] = sdiv i16 [[A4]], [[B4]] +; NO-SVE-NEXT: [[TMP6:%.*]] = sdiv i16 [[A5]], [[B5]] +; NO-SVE-NEXT: [[TMP7:%.*]] = sdiv i16 [[A6]], [[B6]] +; NO-SVE-NEXT: [[TMP8:%.*]] = sdiv i16 [[A7]], [[B7]] +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <8 x i16> [[R0]], i16 [[TMP2]], i32 1 +; NO-SVE-NEXT: [[R2:%.*]] = insertelement <8 x i16> [[R1]], i16 [[TMP3]], i32 2 +; NO-SVE-NEXT: [[R3:%.*]] = insertelement <8 x i16> [[R2]], i16 [[TMP4]], i32 3 +; NO-SVE-NEXT: [[R4:%.*]] = insertelement <8 x i16> [[R3]], i16 [[TMP5]], i32 4 +; NO-SVE-NEXT: [[R5:%.*]] = insertelement <8 x i16> [[R4]], i16 [[TMP6]], i32 5 +; NO-SVE-NEXT: [[R6:%.*]] = insertelement <8 x i16> [[R5]], i16 [[TMP7]], i32 6 +; NO-SVE-NEXT: [[R7:%.*]] = insertelement <8 x i16> [[R6]], i16 [[TMP8]], i32 7 +; NO-SVE-NEXT: ret <8 x i16> [[R3]] +; +; SVE-LABEL: define <8 x i16> @slp_v8i16_Op1_Op2_unknown( +; SVE-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> +; SVE-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> +; SVE-NEXT: [[TMP3:%.*]] = sdiv <4 x i16> [[TMP1]], [[TMP2]] +; SVE-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> +; SVE-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> +; SVE-NEXT: [[TMP6:%.*]] = sdiv <4 x i16> [[TMP4]], [[TMP5]] +; SVE-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <8 x i32> +; SVE-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <8 x i32> +; SVE-NEXT: [[R71:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> [[TMP8]], <8 x i32> +; SVE-NEXT: ret <8 x i16> [[TMP7]] +; +{ + %a0 = extractelement <8 x i16> %a, i32 0 + %a1 = extractelement <8 x i16> %a, i32 1 + %a2 = extractelement <8 x i16> %a, i32 2 + %a3 = extractelement <8 x i16> %a, i32 3 + %a4 = extractelement <8 x i16> %a, i32 4 + %a5 = extractelement <8 x i16> %a, i32 5 + %a6 = extractelement <8 x i16> %a, i32 6 + %a7 = extractelement <8 x i16> %a, i32 7 + %b0 = extractelement <8 x i16> %b, i32 0 + %b1 = extractelement <8 x i16> %b, i32 1 + %b2 = extractelement <8 x i16> %b, i32 2 + %b3 = extractelement <8 x i16> %b, i32 3 + %b4 = extractelement <8 x i16> %b, i32 4 + %b5 = extractelement <8 x i16> %b, i32 5 + %b6 = extractelement <8 x i16> %b, i32 6 + %b7 = extractelement <8 x i16> %b, i32 7 + %1 = sdiv i16 %a0, %b0 + %2 = sdiv i16 %a1, %b1 + %3 = sdiv i16 %a2, %b2 + %4 = sdiv i16 %a3, %b3 + %5 = sdiv i16 %a4, %b4 + %6 = sdiv i16 %a5, %b5 + %7 = sdiv i16 %a6, %b6 + %8 = sdiv i16 %a7, %b7 + %r0 = insertelement <8 x i16> poison, i16 %1, i32 0 + %r1 = insertelement <8 x i16> %r0, i16 %2, i32 1 + %r2 = insertelement <8 x i16> %r1, i16 %3, i32 2 + %r3 = insertelement <8 x i16> %r2, i16 %4, i32 3 + %r4 = insertelement <8 x i16> %r3, i16 %5, i32 4 + %r5 = insertelement <8 x i16> %r4, i16 %6, i32 5 + %r6 = insertelement <8 x i16> %r5, i16 %7, i32 6 + %r7 = insertelement <8 x i16> %r6, i16 %8, i32 7 + ret <8 x i16> %r3 +} + +define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(<4 x i32> %a) +; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const( +; NO-SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1 +; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 +; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], 1 +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], 3 +; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i32 [[A2]], 5 +; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i32 [[A3]], 7 +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1 +; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2 +; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3 +; NO-SVE-NEXT: ret <4 x i32> [[R3]] +; +; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const( +; SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], +; SVE-NEXT: ret <4 x i32> [[TMP1]] +; +{ + %a0 = extractelement <4 x i32> %a, i32 0 + %a1 = extractelement <4 x i32> %a, i32 1 + %a2 = extractelement <4 x i32> %a, i32 2 + %a3 = extractelement <4 x i32> %a, i32 3 + %1 = sdiv i32 %a0, 1 + %2 = sdiv i32 %a1, 3 + %3 = sdiv i32 %a2, 5 + %4 = sdiv i32 %a3, 7 + %r0 = insertelement <4 x i32> poison, i32 %1, i32 0 + %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1 + %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2 + %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3 + ret <4 x i32> %r3 +} + +define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(<4 x i32> %a) +; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const( +; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; +{ + %a0 = extractelement <4 x i32> %a, i32 0 + %a1 = extractelement <4 x i32> %a, i32 1 + %a2 = extractelement <4 x i32> %a, i32 2 + %a3 = extractelement <4 x i32> %a, i32 3 + %1 = sdiv i32 %a0, 5 + %2 = sdiv i32 %a1, 5 + %3 = sdiv i32 %a2, 5 + %4 = sdiv i32 %a3, 5 + %r0 = insertelement <4 x i32> poison, i32 %1, i32 0 + %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1 + %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2 + %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3 + ret <4 x i32> %r3 +} + +define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2(<4 x i32> %a) +; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2( +; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; +{ + %a0 = extractelement <4 x i32> %a, i32 0 + %a1 = extractelement <4 x i32> %a, i32 1 + %a2 = extractelement <4 x i32> %a, i32 2 + %a3 = extractelement <4 x i32> %a, i32 3 + %1 = sdiv i32 %a0, 4 + %2 = sdiv i32 %a1, 4 + %3 = sdiv i32 %a2, 4 + %4 = sdiv i32 %a3, 4 + %r0 = insertelement <4 x i32> poison, i32 %1, i32 0 + %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1 + %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2 + %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3 + ret <4 x i32> %r3 +} + +define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a) +; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2( +; NO-SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] { +; NO-SVE-NEXT: [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0 +; NO-SVE-NEXT: [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1 +; NO-SVE-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 +; NO-SVE-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3 +; NO-SVE-NEXT: [[TMP1:%.*]] = sdiv i32 [[A0]], 1 +; NO-SVE-NEXT: [[TMP2:%.*]] = sdiv i32 [[A1]], 2 +; NO-SVE-NEXT: [[TMP3:%.*]] = sdiv i32 [[A2]], 4 +; NO-SVE-NEXT: [[TMP4:%.*]] = sdiv i32 [[A3]], 8 +; NO-SVE-NEXT: [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0 +; NO-SVE-NEXT: [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1 +; NO-SVE-NEXT: [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2 +; NO-SVE-NEXT: [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3 +; NO-SVE-NEXT: ret <4 x i32> [[R3]] +; +; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2( +; SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] { +; SVE-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> [[A]], +; SVE-NEXT: ret <4 x i32> [[TMP1]] +; +{ + %a0 = extractelement <4 x i32> %a, i32 0 + %a1 = extractelement <4 x i32> %a, i32 1 + %a2 = extractelement <4 x i32> %a, i32 2 + %a3 = extractelement <4 x i32> %a, i32 3 + %1 = sdiv i32 %a0, 1 + %2 = sdiv i32 %a1, 2 + %3 = sdiv i32 %a2, 4 + %4 = sdiv i32 %a3, 8 + %r0 = insertelement <4 x i32> poison, i32 %1, i32 0 + %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1 + %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2 + %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3 + ret <4 x i32> %r3 +} From 3ac75ee8ecbe3bb1d1907adf48731bb6dc2c9918 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 28 Oct 2024 16:25:58 +0100 Subject: [PATCH 179/425] Revert "[runtimes] Allow building against an installed LLVM tree" (#113920) Reverts llvm/llvm-project#86209 This patch breaks running tests locally, which is extremely disruptive to libc++ development. --- compiler-rt/cmake/Modules/AddCompilerRT.cmake | 1 - compiler-rt/test/hwasan/lit.cfg.py | 9 ------ compiler-rt/test/lit.common.configured.in | 1 - runtimes/CMakeLists.txt | 29 +++++++------------ 4 files changed, 10 insertions(+), 30 deletions(-) diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake index b2f33d1a961c747..e3d81d241b10542 100644 --- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake +++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake @@ -773,7 +773,6 @@ function(configure_compiler_rt_lit_site_cfg input output) string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_TEST_COMPILER ${COMPILER_RT_TEST_COMPILER}) string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_OUTPUT_DIR ${COMPILER_RT_OUTPUT_DIR}) - string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR ${COMPILER_RT_EXEC_OUTPUT_DIR}) string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR ${output_dir}) configure_lit_site_cfg(${input} ${output}) diff --git a/compiler-rt/test/hwasan/lit.cfg.py b/compiler-rt/test/hwasan/lit.cfg.py index bbf23e683240ac4..594f3294a84ac17 100644 --- a/compiler-rt/test/hwasan/lit.cfg.py +++ b/compiler-rt/test/hwasan/lit.cfg.py @@ -2,9 +2,6 @@ import os -from lit.llvm import llvm_config -from lit.llvm.subst import ToolSubst, FindTool - # Setup config name. config.name = "HWAddressSanitizer" + getattr(config, "name_suffix", "default") @@ -77,12 +74,6 @@ def build_invocation(compile_flags): ("%env_hwasan_opts=", "env HWASAN_OPTIONS=" + default_hwasan_opts_str) ) -# Ensure that we can use hwasan_symbolize from the expected location -llvm_config.add_tool_substitutions( - [ToolSubst("hwasan_symbolize", unresolved="fatal")], - search_dirs=[config.compiler_rt_bindir], -) - # Default test suffixes. config.suffixes = [".c", ".cpp"] diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in index 050792b6b262175..66935c358afeddb 100644 --- a/compiler-rt/test/lit.common.configured.in +++ b/compiler-rt/test/lit.common.configured.in @@ -28,7 +28,6 @@ set_default("python_executable", "@Python3_EXECUTABLE@") set_default("compiler_rt_debug", @COMPILER_RT_DEBUG_PYBOOL@) set_default("compiler_rt_intercept_libdispatch", @COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL@) set_default("compiler_rt_output_dir", "@COMPILER_RT_RESOLVED_OUTPUT_DIR@") -set_default("compiler_rt_bindir", "@COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR@") set_default("compiler_rt_libdir", "@COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR@") set_default("emulator", "@COMPILER_RT_EMULATOR@") set_default("asan_shadow_scale", "@COMPILER_RT_ASAN_SHADOW_SCALE@") diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 67368dcedb3bf6d..830165c799c2ab9 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -239,23 +239,6 @@ foreach(entry ${runtimes}) endforeach() if(LLVM_INCLUDE_TESTS) - # If built with the runtimes build (rooted at runtimes/CMakeLists.txt), we - # won't have llvm-lit. If built with the bootstrapping build (rooted at - # llvm/CMakeLists.txt), the top-level llvm CMake invocation already generated - # the llvm-lit script. - if (NOT HAVE_LLVM_LIT) - # Add lit before adding any runtimes since their CMake tests configuration - # might depend on lit being present. - set(LLVM_LIT_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin) - add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit - ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit) - # Ensure that the testsuites use the local lit rather than - # ${LLVM_INSTALL_DIR}/bin/llvm-lit (which may not exist if LLVM_BINARY_DIR - # points at an installed LLVM tree rather than a build tree). - get_llvm_lit_path(_base_dir _file_name) - set(LLVM_EXTERNAL_LIT "${_base_dir}/${_file_name}" CACHE STRING "Command used to spawn lit" FORCE) - endif() - set(LIT_ARGS_DEFAULT "-sv --show-xfail --show-unsupported") if (MSVC OR XCODE) set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") @@ -289,6 +272,14 @@ if(LLVM_INCLUDE_TESTS) # and we know the total set of lit testsuites. umbrella_lit_testsuite_end(check-runtimes) + if (NOT HAVE_LLVM_LIT) + # If built by manually invoking cmake on this directory, we don't have + # llvm-lit. If invoked via llvm/runtimes, the toplevel llvm cmake + # invocation already generated the llvm-lit script. + add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit + ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit) + endif() + get_property(LLVM_RUNTIMES_LIT_TESTSUITES GLOBAL PROPERTY LLVM_RUNTIMES_LIT_TESTSUITES) string(REPLACE ";" "\n" LLVM_RUNTIMES_LIT_TESTSUITES "${LLVM_RUNTIMES_LIT_TESTSUITES}") file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/lit.tests ${LLVM_RUNTIMES_LIT_TESTSUITES}) @@ -318,10 +309,10 @@ if(SUB_COMPONENTS) if(LLVM_RUNTIMES_TARGET) configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/runtimes/${LLVM_RUNTIMES_TARGET}/Components.cmake) + ${LLVM_BINARY_DIR}/runtimes/${LLVM_RUNTIMES_TARGET}/Components.cmake) else() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/runtimes/Components.cmake) + ${LLVM_BINARY_DIR}/runtimes/Components.cmake) endif() endif() From 7d1e98c7d34e2f4be1812a2f00f1d0464f29e542 Mon Sep 17 00:00:00 2001 From: Abhina Sree Date: Mon, 28 Oct 2024 11:34:57 -0400 Subject: [PATCH 180/425] [AIX][SystemZ][z/OS] Disable test for AIX, z/OS due to missing DWARF sections (#113910) This patch disables the testcase for AIX and z/OS due to incomplete DWARF support. --- llvm/test/DebugInfo/Generic/debug-names-accel-table-types.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/DebugInfo/Generic/debug-names-accel-table-types.ll b/llvm/test/DebugInfo/Generic/debug-names-accel-table-types.ll index e88afe1b4c51189..2a6c89dcab5978b 100644 --- a/llvm/test/DebugInfo/Generic/debug-names-accel-table-types.ll +++ b/llvm/test/DebugInfo/Generic/debug-names-accel-table-types.ll @@ -1,3 +1,4 @@ +; XFAIL: target={{.*}}-aix{{.*}}, target={{.*}}-zos{{.*}} ; RUN: %llc_dwarf -debugger-tune=lldb -accel-tables=Dwarf -filetype=obj -o %t < %s ; RUN: llvm-dwarfdump %t | FileCheck %s ; RUN: llvm-dwarfdump -debug-names %t | FileCheck --check-prefix=SAME-NAME %s From 53f7f8eccabd6e3383edfeec312bf8671a89bc66 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 28 Oct 2024 15:43:14 +0000 Subject: [PATCH 181/425] [Clang][AArch64] Fix Pure Scalables Types argument passing and return (#112747) Pure Scalable Types are defined in AAPCS64 here: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#pure-scalable-types-psts And should be passed according to Rule C.7 here: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#682parameter-passing-rules This part of the ABI is completely unimplemented in Clang, instead it treats PSTs sometimes as HFAs/HVAs, sometime as general composite types. This patch implements the rules for passing PSTs by employing the `CoerceAndExpand` method and extending it to: * allow array types in the `coerceToType`; Now only `[N x i8]` are considered padding. * allow mismatch between the elements of the `coerceToType` and the elements of the `unpaddedCoerceToType`; AArch64 uses this to map fixed-length vector types to SVE vector types. Corectly passing a PST argument needs a decision in Clang about whether to pass it in memory or registers or, equivalently, whether to use the `Indirect` or `Expand/CoerceAndExpand` method. It was considered relatively harder (or not practically possible) to make that decision in the AArch64 backend. Hence this patch implements the register counting from AAPCS64 (cf. `NSRN`, `NPRN`) to guide the Clang's decision. --- clang/include/clang/CodeGen/CGFunctionInfo.h | 16 +- clang/lib/CodeGen/CGCall.cpp | 85 +++- clang/lib/CodeGen/Targets/AArch64.cpp | 374 ++++++++++++-- .../aarch64-pure-scalable-args-empty-union.c | 39 ++ .../test/CodeGen/aarch64-pure-scalable-args.c | 461 ++++++++++++++++++ 5 files changed, 897 insertions(+), 78 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-pure-scalable-args-empty-union.c create mode 100644 clang/test/CodeGen/aarch64-pure-scalable-args.c diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h index d19f84d198876f5..9d785d878b61dcc 100644 --- a/clang/include/clang/CodeGen/CGFunctionInfo.h +++ b/clang/include/clang/CodeGen/CGFunctionInfo.h @@ -271,12 +271,8 @@ class ABIArgInfo { // in the unpadded type. unsigned unpaddedIndex = 0; for (auto eltType : coerceToType->elements()) { - if (isPaddingForCoerceAndExpand(eltType)) continue; - if (unpaddedStruct) { - assert(unpaddedStruct->getElementType(unpaddedIndex) == eltType); - } else { - assert(unpaddedIndex == 0 && unpaddedCoerceToType == eltType); - } + if (isPaddingForCoerceAndExpand(eltType)) + continue; unpaddedIndex++; } @@ -295,12 +291,8 @@ class ABIArgInfo { } static bool isPaddingForCoerceAndExpand(llvm::Type *eltType) { - if (eltType->isArrayTy()) { - assert(eltType->getArrayElementType()->isIntegerTy(8)); - return true; - } else { - return false; - } + return eltType->isArrayTy() && + eltType->getArrayElementType()->isIntegerTy(8); } Kind getKind() const { return TheKind; } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 1949b4ceb7f204c..64e60f0616d77b7 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1410,6 +1410,30 @@ static Address emitAddressAtOffset(CodeGenFunction &CGF, Address addr, return addr; } +static std::pair +CoerceScalableToFixed(CodeGenFunction &CGF, llvm::FixedVectorType *ToTy, + llvm::ScalableVectorType *FromTy, llvm::Value *V, + StringRef Name = "") { + // If we are casting a scalable i1 predicate vector to a fixed i8 + // vector, first bitcast the source. + if (FromTy->getElementType()->isIntegerTy(1) && + FromTy->getElementCount().isKnownMultipleOf(8) && + ToTy->getElementType() == CGF.Builder.getInt8Ty()) { + FromTy = llvm::ScalableVectorType::get( + ToTy->getElementType(), + FromTy->getElementCount().getKnownMinValue() / 8); + V = CGF.Builder.CreateBitCast(V, FromTy); + } + if (FromTy->getElementType() == ToTy->getElementType()) { + llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty); + + V->setName(Name + ".coerce"); + V = CGF.Builder.CreateExtractVector(ToTy, V, Zero, "cast.fixed"); + return {V, true}; + } + return {V, false}; +} + namespace { /// Encapsulates information about the way function arguments from @@ -3196,26 +3220,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, // a VLAT at the function boundary and the types match up, use // llvm.vector.extract to convert back to the original VLST. if (auto *VecTyTo = dyn_cast(ConvertType(Ty))) { - llvm::Value *Coerced = Fn->getArg(FirstIRArg); + llvm::Value *ArgVal = Fn->getArg(FirstIRArg); if (auto *VecTyFrom = - dyn_cast(Coerced->getType())) { - // If we are casting a scalable i1 predicate vector to a fixed i8 - // vector, bitcast the source and use a vector extract. - if (VecTyFrom->getElementType()->isIntegerTy(1) && - VecTyFrom->getElementCount().isKnownMultipleOf(8) && - VecTyTo->getElementType() == Builder.getInt8Ty()) { - VecTyFrom = llvm::ScalableVectorType::get( - VecTyTo->getElementType(), - VecTyFrom->getElementCount().getKnownMinValue() / 8); - Coerced = Builder.CreateBitCast(Coerced, VecTyFrom); - } - if (VecTyFrom->getElementType() == VecTyTo->getElementType()) { - llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty); - + dyn_cast(ArgVal->getType())) { + auto [Coerced, Extracted] = CoerceScalableToFixed( + *this, VecTyTo, VecTyFrom, ArgVal, Arg->getName()); + if (Extracted) { assert(NumIRArgs == 1); - Coerced->setName(Arg->getName() + ".coerce"); - ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector( - VecTyTo, Coerced, Zero, "cast.fixed"))); + ArgVals.push_back(ParamValue::forDirect(Coerced)); break; } } @@ -3326,16 +3338,33 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, ArgVals.push_back(ParamValue::forIndirect(alloca)); auto coercionType = ArgI.getCoerceAndExpandType(); + auto unpaddedCoercionType = ArgI.getUnpaddedCoerceAndExpandType(); + auto *unpaddedStruct = dyn_cast(unpaddedCoercionType); + alloca = alloca.withElementType(coercionType); unsigned argIndex = FirstIRArg; + unsigned unpaddedIndex = 0; for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { llvm::Type *eltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; auto eltAddr = Builder.CreateStructGEP(alloca, i); - auto elt = Fn->getArg(argIndex++); + llvm::Value *elt = Fn->getArg(argIndex++); + + auto paramType = unpaddedStruct + ? unpaddedStruct->getElementType(unpaddedIndex++) + : unpaddedCoercionType; + + if (auto *VecTyTo = dyn_cast(eltType)) { + if (auto *VecTyFrom = dyn_cast(paramType)) { + bool Extracted; + std::tie(elt, Extracted) = CoerceScalableToFixed( + *this, VecTyTo, VecTyFrom, elt, elt->getName()); + assert(Extracted && "Unexpected scalable to fixed vector coercion"); + } + } Builder.CreateStore(elt, eltAddr); } assert(argIndex == FirstIRArg + NumIRArgs); @@ -3930,17 +3959,24 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, case ABIArgInfo::CoerceAndExpand: { auto coercionType = RetAI.getCoerceAndExpandType(); + auto unpaddedCoercionType = RetAI.getUnpaddedCoerceAndExpandType(); + auto *unpaddedStruct = dyn_cast(unpaddedCoercionType); // Load all of the coerced elements out into results. llvm::SmallVector results; Address addr = ReturnValue.withElementType(coercionType); + unsigned unpaddedIndex = 0; for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { auto coercedEltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType)) continue; auto eltAddr = Builder.CreateStructGEP(addr, i); - auto elt = Builder.CreateLoad(eltAddr); + llvm::Value *elt = CreateCoercedLoad( + eltAddr, + unpaddedStruct ? unpaddedStruct->getElementType(unpaddedIndex++) + : unpaddedCoercionType, + *this); results.push_back(elt); } @@ -5472,6 +5508,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, case ABIArgInfo::CoerceAndExpand: { auto coercionType = ArgInfo.getCoerceAndExpandType(); auto layout = CGM.getDataLayout().getStructLayout(coercionType); + auto unpaddedCoercionType = ArgInfo.getUnpaddedCoerceAndExpandType(); + auto *unpaddedStruct = dyn_cast(unpaddedCoercionType); llvm::Value *tempSize = nullptr; Address addr = Address::invalid(); @@ -5502,11 +5540,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, addr = addr.withElementType(coercionType); unsigned IRArgPos = FirstIRArg; + unsigned unpaddedIndex = 0; for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) { llvm::Type *eltType = coercionType->getElementType(i); if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue; Address eltAddr = Builder.CreateStructGEP(addr, i); - llvm::Value *elt = Builder.CreateLoad(eltAddr); + llvm::Value *elt = CreateCoercedLoad( + eltAddr, + unpaddedStruct ? unpaddedStruct->getElementType(unpaddedIndex++) + : unpaddedCoercionType, + *this); if (ArgHasMaybeUndefAttr) elt = Builder.CreateFreeze(elt); IRCallArgs[IRArgPos++] = elt; diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index ec617eec67192cc..a80411971b60c39 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -34,10 +34,17 @@ class AArch64ABIInfo : public ABIInfo { AArch64ABIKind getABIKind() const { return Kind; } bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; } - ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const; - ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic, - unsigned CallingConvention) const; - ABIArgInfo coerceIllegalVector(QualType Ty) const; + ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadicFn) const; + ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadicFn, + bool IsNamedArg, unsigned CallingConvention, + unsigned &NSRN, unsigned &NPRN) const; + llvm::Type *convertFixedToScalableVectorType(const VectorType *VT) const; + ABIArgInfo coerceIllegalVector(QualType Ty, unsigned &NSRN, + unsigned &NPRN) const; + ABIArgInfo coerceAndExpandPureScalableAggregate( + QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred, + const SmallVectorImpl &UnpaddedCoerceToSeq, unsigned &NSRN, + unsigned &NPRN) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; @@ -45,14 +52,26 @@ class AArch64ABIInfo : public ABIInfo { bool isIllegalVectorType(QualType Ty) const; + bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP, + SmallVectorImpl &CoerceToSeq) const; + + void flattenType(llvm::Type *Ty, + SmallVectorImpl &Flattened) const; + void computeInfo(CGFunctionInfo &FI) const override { if (!::classifyReturnType(getCXXABI(), FI, *this)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic()); - for (auto &it : FI.arguments()) - it.info = classifyArgumentType(it.type, FI.isVariadic(), - FI.getCallingConvention()); + unsigned ArgNo = 0; + unsigned NSRN = 0, NPRN = 0; + for (auto &it : FI.arguments()) { + const bool IsNamedArg = + !FI.isVariadic() || ArgNo < FI.getRequiredArgs().getNumRequiredArgs(); + ++ArgNo; + it.info = classifyArgumentType(it.type, FI.isVariadic(), IsNamedArg, + FI.getCallingConvention(), NSRN, NPRN); + } } RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF, @@ -201,65 +220,83 @@ void WindowsAArch64TargetCodeGenInfo::setTargetAttributes( } } -ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const { - assert(Ty->isVectorType() && "expected vector type!"); +llvm::Type * +AArch64ABIInfo::convertFixedToScalableVectorType(const VectorType *VT) const { + assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); - const auto *VT = Ty->castAs(); if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { - assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); assert(VT->getElementType()->castAs()->getKind() == BuiltinType::UChar && "unexpected builtin type for SVE predicate!"); - return ABIArgInfo::getDirect(llvm::ScalableVectorType::get( - llvm::Type::getInt1Ty(getVMContext()), 16)); + return llvm::ScalableVectorType::get(llvm::Type::getInt1Ty(getVMContext()), + 16); } if (VT->getVectorKind() == VectorKind::SveFixedLengthData) { - assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); - const auto *BT = VT->getElementType()->castAs(); - llvm::ScalableVectorType *ResType = nullptr; switch (BT->getKind()) { default: llvm_unreachable("unexpected builtin type for SVE vector!"); + case BuiltinType::SChar: case BuiltinType::UChar: - ResType = llvm::ScalableVectorType::get( + return llvm::ScalableVectorType::get( llvm::Type::getInt8Ty(getVMContext()), 16); - break; + case BuiltinType::Short: case BuiltinType::UShort: - ResType = llvm::ScalableVectorType::get( + return llvm::ScalableVectorType::get( llvm::Type::getInt16Ty(getVMContext()), 8); - break; + case BuiltinType::Int: case BuiltinType::UInt: - ResType = llvm::ScalableVectorType::get( + return llvm::ScalableVectorType::get( llvm::Type::getInt32Ty(getVMContext()), 4); - break; + case BuiltinType::Long: case BuiltinType::ULong: - ResType = llvm::ScalableVectorType::get( + return llvm::ScalableVectorType::get( llvm::Type::getInt64Ty(getVMContext()), 2); - break; + case BuiltinType::Half: - ResType = llvm::ScalableVectorType::get( + return llvm::ScalableVectorType::get( llvm::Type::getHalfTy(getVMContext()), 8); - break; + case BuiltinType::Float: - ResType = llvm::ScalableVectorType::get( + return llvm::ScalableVectorType::get( llvm::Type::getFloatTy(getVMContext()), 4); - break; + case BuiltinType::Double: - ResType = llvm::ScalableVectorType::get( + return llvm::ScalableVectorType::get( llvm::Type::getDoubleTy(getVMContext()), 2); - break; + case BuiltinType::BFloat16: - ResType = llvm::ScalableVectorType::get( + return llvm::ScalableVectorType::get( llvm::Type::getBFloatTy(getVMContext()), 8); - break; } - return ABIArgInfo::getDirect(ResType); + } + + llvm_unreachable("expected fixed-length SVE vector"); +} + +ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty, unsigned &NSRN, + unsigned &NPRN) const { + assert(Ty->isVectorType() && "expected vector type!"); + + const auto *VT = Ty->castAs(); + if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { + assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); + assert(VT->getElementType()->castAs()->getKind() == + BuiltinType::UChar && + "unexpected builtin type for SVE predicate!"); + NPRN = std::min(NPRN + 1, 4u); + return ABIArgInfo::getDirect(llvm::ScalableVectorType::get( + llvm::Type::getInt1Ty(getVMContext()), 16)); + } + + if (VT->getVectorKind() == VectorKind::SveFixedLengthData) { + NSRN = std::min(NSRN + 1, 8u); + return ABIArgInfo::getDirect(convertFixedToScalableVectorType(VT)); } uint64_t Size = getContext().getTypeSize(Ty); @@ -273,26 +310,54 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const { return ABIArgInfo::getDirect(ResType); } if (Size == 64) { + NSRN = std::min(NSRN + 1, 8u); auto *ResType = llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2); return ABIArgInfo::getDirect(ResType); } if (Size == 128) { + NSRN = std::min(NSRN + 1, 8u); auto *ResType = llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4); return ABIArgInfo::getDirect(ResType); } + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } -ABIArgInfo -AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic, - unsigned CallingConvention) const { +ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate( + QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred, + const SmallVectorImpl &UnpaddedCoerceToSeq, unsigned &NSRN, + unsigned &NPRN) const { + if (!IsNamedArg || NSRN + NVec > 8 || NPRN + NPred > 4) + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + NSRN += NVec; + NPRN += NPred; + + llvm::Type *UnpaddedCoerceToType = + UnpaddedCoerceToSeq.size() == 1 + ? UnpaddedCoerceToSeq[0] + : llvm::StructType::get(CGT.getLLVMContext(), UnpaddedCoerceToSeq, + true); + + SmallVector CoerceToSeq; + flattenType(CGT.ConvertType(Ty), CoerceToSeq); + auto *CoerceToType = + llvm::StructType::get(CGT.getLLVMContext(), CoerceToSeq, false); + + return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); +} + +ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn, + bool IsNamedArg, + unsigned CallingConvention, + unsigned &NSRN, + unsigned &NPRN) const { Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. if (isIllegalVectorType(Ty)) - return coerceIllegalVector(Ty); + return coerceIllegalVector(Ty, NSRN, NPRN); if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. @@ -303,6 +368,36 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic, if (EIT->getNumBits() > 128) return getNaturalAlignIndirect(Ty, false); + if (Ty->isVectorType()) + NSRN = std::min(NSRN + 1, 8u); + else if (const auto *BT = Ty->getAs()) { + if (BT->isFloatingPoint()) + NSRN = std::min(NSRN + 1, 8u); + else { + switch (BT->getKind()) { + case BuiltinType::MFloat8x8: + case BuiltinType::MFloat8x16: + NSRN = std::min(NSRN + 1, 8u); + break; + case BuiltinType::SveBool: + case BuiltinType::SveCount: + NPRN = std::min(NPRN + 1, 4u); + break; + case BuiltinType::SveBoolx2: + NPRN = std::min(NPRN + 2, 4u); + break; + case BuiltinType::SveBoolx4: + NPRN = std::min(NPRN + 4, 4u); + break; + default: + if (BT->isSVESizelessBuiltinType()) + NSRN = std::min( + NSRN + getContext().getBuiltinVectorTypeInfo(BT).NumVectors, + 8u); + } + } + } + return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS() ? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty)) : ABIArgInfo::getDirect()); @@ -335,10 +430,11 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic, uint64_t Members = 0; bool IsWin64 = Kind == AArch64ABIKind::Win64 || CallingConvention == llvm::CallingConv::Win64; - bool IsWinVariadic = IsWin64 && IsVariadic; + bool IsWinVariadic = IsWin64 && IsVariadicFn; // In variadic functions on Windows, all composite types are treated alike, // no special handling of HFAs/HVAs. if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) { + NSRN = std::min(NSRN + Members, uint64_t(8)); if (Kind != AArch64ABIKind::AAPCS) return ABIArgInfo::getDirect( llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members)); @@ -353,6 +449,17 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic, nullptr, true, Align); } + // In AAPCS named arguments of a Pure Scalable Type are passed expanded in + // registers, or indirectly if there are not enough registers. + if (Kind == AArch64ABIKind::AAPCS) { + unsigned NVec = 0, NPred = 0; + SmallVector UnpaddedCoerceToSeq; + if (passAsPureScalableType(Ty, NVec, NPred, UnpaddedCoerceToSeq) && + (NVec + NPred) > 0) + return coerceAndExpandPureScalableAggregate( + Ty, IsNamedArg, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN); + } + // Aggregates <= 16 bytes are passed directly in registers or on the stack. if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of @@ -383,14 +490,16 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic, } ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, - bool IsVariadic) const { + bool IsVariadicFn) const { if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (const auto *VT = RetTy->getAs()) { if (VT->getVectorKind() == VectorKind::SveFixedLengthData || - VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) - return coerceIllegalVector(RetTy); + VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { + unsigned NSRN = 0, NPRN = 0; + return coerceIllegalVector(RetTy, NSRN, NPRN); + } } // Large vector types should be returned via memory. @@ -419,10 +528,24 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members) && !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 && - IsVariadic)) + IsVariadicFn)) // Homogeneous Floating-point Aggregates (HFAs) are returned directly. return ABIArgInfo::getDirect(); + // In AAPCS return values of a Pure Scalable type are treated as a single + // named argument and passed expanded in registers, or indirectly if there are + // not enough registers. + if (Kind == AArch64ABIKind::AAPCS) { + unsigned NSRN = 0, NPRN = 0; + unsigned NVec = 0, NPred = 0; + SmallVector UnpaddedCoerceToSeq; + if (passAsPureScalableType(RetTy, NVec, NPred, UnpaddedCoerceToSeq) && + (NVec + NPred) > 0) + return coerceAndExpandPureScalableAggregate( + RetTy, /* IsNamedArg */ true, NVec, NPred, UnpaddedCoerceToSeq, NSRN, + NPRN); + } + // Aggregates <= 16 bytes are returned directly in registers or on the stack. if (Size <= 128) { // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of @@ -508,9 +631,15 @@ bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // but with the difference that any floating-point type is allowed, // including __fp16. if (const BuiltinType *BT = Ty->getAs()) { - if (BT->isFloatingPoint()) + if (BT->isFloatingPoint() || BT->getKind() == BuiltinType::MFloat8x16 || + BT->getKind() == BuiltinType::MFloat8x8) return true; } else if (const VectorType *VT = Ty->getAs()) { + if (auto Kind = VT->getVectorKind(); + Kind == VectorKind::SveFixedLengthData || + Kind == VectorKind::SveFixedLengthPredicate) + return false; + unsigned VecSize = getContext().getTypeSize(VT); if (VecSize == 64 || VecSize == 128) return true; @@ -533,11 +662,166 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() return true; } +// Check if a type needs to be passed in registers as a Pure Scalable Type (as +// defined by AAPCS64). Return the number of data vectors and the number of +// predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon +// return `CoerceToSeq` contains an expanded sequence of LLVM IR types, one +// element for each non-composite member. For practical purposes, limit the +// length of `CoerceToSeq` to about 12 (the maximum that could possibly fit +// in registers) and return false, the effect of which will be to pass the +// argument under the rules for a large (> 128 bytes) composite. +bool AArch64ABIInfo::passAsPureScalableType( + QualType Ty, unsigned &NVec, unsigned &NPred, + SmallVectorImpl &CoerceToSeq) const { + if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { + uint64_t NElt = AT->getZExtSize(); + if (NElt == 0) + return false; + + unsigned NV = 0, NP = 0; + SmallVector EltCoerceToSeq; + if (!passAsPureScalableType(AT->getElementType(), NV, NP, EltCoerceToSeq)) + return false; + + if (CoerceToSeq.size() + NElt * EltCoerceToSeq.size() > 12) + return false; + + for (uint64_t I = 0; I < NElt; ++I) + llvm::copy(EltCoerceToSeq, std::back_inserter(CoerceToSeq)); + + NVec += NElt * NV; + NPred += NElt * NP; + return true; + } + + if (const RecordType *RT = Ty->getAs()) { + // If the record cannot be passed in registers, then it's not a PST. + if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()); + RAA != CGCXXABI::RAA_Default) + return false; + + // Pure scalable types are never unions and never contain unions. + const RecordDecl *RD = RT->getDecl(); + if (RD->isUnion()) + return false; + + // If this is a C++ record, check the bases. + if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { + for (const auto &I : CXXRD->bases()) { + if (isEmptyRecord(getContext(), I.getType(), true)) + continue; + if (!passAsPureScalableType(I.getType(), NVec, NPred, CoerceToSeq)) + return false; + } + } + + // Check members. + for (const auto *FD : RD->fields()) { + QualType FT = FD->getType(); + if (isEmptyField(getContext(), FD, /* AllowArrays */ true)) + continue; + if (!passAsPureScalableType(FT, NVec, NPred, CoerceToSeq)) + return false; + } + + return true; + } + + const auto *VT = Ty->getAs(); + if (!VT) + return false; + + if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) { + ++NPred; + if (CoerceToSeq.size() + 1 > 12) + return false; + CoerceToSeq.push_back(convertFixedToScalableVectorType(VT)); + return true; + } + + if (VT->getVectorKind() == VectorKind::SveFixedLengthData) { + ++NVec; + if (CoerceToSeq.size() + 1 > 12) + return false; + CoerceToSeq.push_back(convertFixedToScalableVectorType(VT)); + return true; + } + + if (!VT->isBuiltinType()) + return false; + + switch (cast(VT)->getKind()) { +#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \ + case BuiltinType::Id: \ + ++NVec; \ + break; +#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \ + case BuiltinType::Id: \ + ++NPred; \ + break; +#define SVE_TYPE(Name, Id, SingletonId) +#include "clang/Basic/AArch64SVEACLETypes.def" + default: + return false; + } + + ASTContext::BuiltinVectorTypeInfo Info = + getContext().getBuiltinVectorTypeInfo(cast(Ty)); + assert(Info.NumVectors > 0 && Info.NumVectors <= 4 && + "Expected 1, 2, 3 or 4 vectors!"); + auto VTy = llvm::ScalableVectorType::get(CGT.ConvertType(Info.ElementType), + Info.EC.getKnownMinValue()); + + if (CoerceToSeq.size() + Info.NumVectors > 12) + return false; + std::fill_n(std::back_inserter(CoerceToSeq), Info.NumVectors, VTy); + + return true; +} + +// Expand an LLVM IR type into a sequence with a element for each non-struct, +// non-array member of the type, with the exception of the padding types, which +// are retained. +void AArch64ABIInfo::flattenType( + llvm::Type *Ty, SmallVectorImpl &Flattened) const { + + if (ABIArgInfo::isPaddingForCoerceAndExpand(Ty)) { + Flattened.push_back(Ty); + return; + } + + if (const auto *AT = dyn_cast(Ty)) { + uint64_t NElt = AT->getNumElements(); + if (NElt == 0) + return; + + SmallVector EltFlattened; + flattenType(AT->getElementType(), EltFlattened); + + for (uint64_t I = 0; I < NElt; ++I) + llvm::copy(EltFlattened, std::back_inserter(Flattened)); + return; + } + + if (const auto *ST = dyn_cast(Ty)) { + for (auto *ET : ST->elements()) + flattenType(ET, Flattened); + return; + } + + Flattened.push_back(Ty); +} + RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF, AArch64ABIKind Kind, AggValueSlot Slot) const { - ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true, - CGF.CurFnInfo->getCallingConvention()); + // These numbers are not used for variadic arguments, hence it doesn't matter + // they don't retain their values across multiple calls to + // `classifyArgumentType` here. + unsigned NSRN = 0, NPRN = 0; + ABIArgInfo AI = + classifyArgumentType(Ty, /*IsVariadicFn=*/true, /* IsNamedArg */ false, + CGF.CurFnInfo->getCallingConvention(), NSRN, NPRN); // Empty records are ignored for parameter passing purposes. if (AI.isIgnore()) return Slot.asRValue(); diff --git a/clang/test/CodeGen/aarch64-pure-scalable-args-empty-union.c b/clang/test/CodeGen/aarch64-pure-scalable-args-empty-union.c new file mode 100644 index 000000000000000..546910068c78a22 --- /dev/null +++ b/clang/test/CodeGen/aarch64-pure-scalable-args-empty-union.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -O3 -triple aarch64 -target-feature +sve -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-C +// RUN: %clang_cc1 -x c++ -O3 -triple aarch64 -target-feature +sve -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-CXX + +typedef __SVFloat32_t fvec32 __attribute__((arm_sve_vector_bits(128))); + +// PST containing an empty union: when compiled as C pass it in registers, +// when compiled as C++ - in memory. +typedef struct { + fvec32 x[4]; + union {} u; +} S0; + +#ifdef __cplusplus +extern "C" +#endif +void use0(S0); + +void f0(S0 *p) { + use0(*p); +} +// CHECK-C: declare void @use0(, , , ) +// CHECK-CXX: declare void @use0(ptr noundef) + +#ifdef __cplusplus + +// PST containing an empty union with `[[no_unique_address]]`` - pass in registers. +typedef struct { + fvec32 x[4]; + [[no_unique_address]] + union {} u; +} S1; + +extern "C" void use1(S1); +void f1(S1 *p) { + use1(*p); +} +// CHECK-CXX: declare void @use1(, , , ) + +#endif // __cplusplus diff --git a/clang/test/CodeGen/aarch64-pure-scalable-args.c b/clang/test/CodeGen/aarch64-pure-scalable-args.c new file mode 100644 index 000000000000000..851159ada767495 --- /dev/null +++ b/clang/test/CodeGen/aarch64-pure-scalable-args.c @@ -0,0 +1,461 @@ +// RUN: %clang_cc1 -O3 -triple aarch64 -target-feature +sve -target-feature +sve2p1 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS +// RUN: %clang_cc1 -O3 -triple arm64-apple-ios7.0 -target-abi darwinpcs -target-feature +sve -target-feature +sve2p1 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DARWIN +// RUN: %clang_cc1 -O3 -triple aarch64-linux-gnu -target-feature +sve -target-feature +sve2p1 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS + +// REQUIRES: aarch64-registered-target + +#include +#include +#include + +typedef svfloat32_t fvec32 __attribute__((arm_sve_vector_bits(128))); +typedef svfloat64_t fvec64 __attribute__((arm_sve_vector_bits(128))); +typedef svbool_t bvec __attribute__((arm_sve_vector_bits(128))); +typedef svmfloat8_t mfvec8 __attribute__((arm_sve_vector_bits(128))); + +typedef struct { + float f[4]; +} HFA; + +typedef struct { + mfloat8x16_t f[4]; +} HVA; + +// Pure Scalable Type, needs 4 Z-regs, 2 P-regs +typedef struct { + bvec a; + fvec64 x; + fvec32 y[2]; + mfvec8 z; + bvec b; +} PST; + +// Pure Scalable Type, 1 Z-reg +typedef struct { + fvec32 x; +} SmallPST; + +// Big PST, does not fit in registers. +typedef struct { + struct { + bvec a; + fvec32 x[4]; + } u[2]; + fvec64 v; +} BigPST; + +// A small aggregate type +typedef struct { + char data[16]; +} SmallAgg; + +// CHECK: %struct.PST = type { <2 x i8>, <2 x double>, [2 x <4 x float>], <16 x i8>, <2 x i8> } + +// Test argument passing of Pure Scalable Types by examining the generated +// LLVM IR function declarations. A PST argument in C/C++ should map to: +// a) an `ptr` argument, if passed indirectly through memory +// b) a series of scalable vector arguments, if passed via registers + +// Simple argument passing, PST expanded into registers. +// a -> p0 +// b -> p1 +// x -> q0 +// y[0] -> q1 +// y[1] -> q2 +// z -> q3 +void test_argpass_simple(PST *p) { + void argpass_simple_callee(PST); + argpass_simple_callee(*p); +} +// CHECK-AAPCS: define dso_local void @test_argpass_simple(ptr nocapture noundef readonly %p) +// CHECK-AAPCS-NEXT: entry: +// CHECK-AAPCS-NEXT: %0 = load <2 x i8>, ptr %p, align 16 +// CHECK-AAPCS-NEXT: %cast.scalable = tail call @llvm.vector.insert.nxv2i8.v2i8( undef, <2 x i8> %0, i64 0) +// CHECK-AAPCS-NEXT: %1 = bitcast %cast.scalable to +// CHECK-AAPCS-NEXT: %2 = getelementptr inbounds nuw i8, ptr %p, i64 16 +// CHECK-AAPCS-NEXT: %3 = load <2 x double>, ptr %2, align 16 +// CHECK-AAPCS-NEXT: %cast.scalable1 = tail call @llvm.vector.insert.nxv2f64.v2f64( undef, <2 x double> %3, i64 0) +// CHECK-AAPCS-NEXT: %4 = getelementptr inbounds nuw i8, ptr %p, i64 32 +// CHECK-AAPCS-NEXT: %5 = load <4 x float>, ptr %4, align 16 +// CHECK-AAPCS-NEXT: %cast.scalable2 = tail call @llvm.vector.insert.nxv4f32.v4f32( undef, <4 x float> %5, i64 0) +// CHECK-AAPCS-NEXT: %6 = getelementptr inbounds nuw i8, ptr %p, i64 48 +// CHECK-AAPCS-NEXT: %7 = load <4 x float>, ptr %6, align 16 +// CHECK-AAPCS-NEXT: %cast.scalable3 = tail call @llvm.vector.insert.nxv4f32.v4f32( undef, <4 x float> %7, i64 0) +// CHECK-AAPCS-NEXT: %8 = getelementptr inbounds nuw i8, ptr %p, i64 64 +// CHECK-AAPCS-NEXT: %9 = load <16 x i8>, ptr %8, align 16 +// CHECK-AAPCS-NEXT: %cast.scalable4 = tail call @llvm.vector.insert.nxv16i8.v16i8( undef, <16 x i8> %9, i64 0) +// CHECK-AAPCS-NEXT: %10 = getelementptr inbounds nuw i8, ptr %p, i64 80 +// CHECK-AAPCS-NEXT: %11 = load <2 x i8>, ptr %10, align 16 +// CHECK-AAPCS-NEXT: %cast.scalable5 = tail call @llvm.vector.insert.nxv2i8.v2i8( undef, <2 x i8> %11, i64 0) +// CHECK-AAPCS-NEXT: %12 = bitcast %cast.scalable5 to +// CHECK-AAPCS-NEXT: tail call void @argpass_simple_callee( %1, %cast.scalable1, %cast.scalable2, %cast.scalable3, %cast.scalable4, %12) +// CHECK-AAPCS-NEXT: ret void + +// CHECK-AAPCS: declare void @argpass_simple_callee(, , , , , ) +// CHECK-DARWIN: declare void @argpass_simple_callee(ptr noundef) + +// Boundary case of using the last available Z-reg, PST expanded. +// 0.0 -> d0-d3 +// a -> p0 +// b -> p1 +// x -> q4 +// y[0] -> q5 +// y[1] -> q6 +// z -> q7 +void test_argpass_last_z(PST *p) { + void argpass_last_z_callee(double, double, double, double, PST); + argpass_last_z_callee(.0, .0, .0, .0, *p); +} +// CHECK-AAPCS: declare void @argpass_last_z_callee(double noundef, double noundef, double noundef, double noundef, , , , , , ) +// CHECK-DARWIN: declare void @argpass_last_z_callee(double noundef, double noundef, double noundef, double noundef, ptr noundef) + + +// Like the above, but using a tuple type to occupy some registers. +// x -> z0.d-z3.d +// a -> p0 +// b -> p1 +// x -> q4 +// y[0] -> q5 +// y[1] -> q6 +// z -> q7 +void test_argpass_last_z_tuple(PST *p, svfloat64x4_t x) { + void argpass_last_z_tuple_callee(svfloat64x4_t, PST); + argpass_last_z_tuple_callee(x, *p); +} +// CHECK-AAPCS: declare void @argpass_last_z_tuple_callee(, , , , , , , , , ) +// CHECK-DARWIN: declare void @argpass_last_z_tuple_callee(, , , , ptr noundef) + + +// Boundary case of using the last available P-reg, PST expanded. +// false -> p0-p1 +// a -> p2 +// b -> p3 +// x -> q0 +// y[0] -> q1 +// y[1] -> q2 +// z -> q3 +void test_argpass_last_p(PST *p) { + void argpass_last_p_callee(svbool_t, svcount_t, PST); + argpass_last_p_callee(svpfalse(), svpfalse_c(), *p); +} +// CHECK-AAPCS: declare void @argpass_last_p_callee(, target("aarch64.svcount"), , , , , , ) +// CHECK-DARWIN: declare void @argpass_last_p_callee(, target("aarch64.svcount"), ptr noundef) + + +// Not enough Z-regs, push PST to memory and pass a pointer, Z-regs and +// P-regs still available for other arguments +// u -> z0 +// v -> q1 +// w -> q2 +// 0.0 -> d3-d4 +// 1 -> w0 +// *p -> memory, address -> x1 +// 2 -> w2 +// 3.0 -> d5 +// true -> p0 +void test_argpass_no_z(PST *p, double dummy, svmfloat8_t u, int8x16_t v, mfloat8x16_t w) { + void argpass_no_z_callee(svmfloat8_t, int8x16_t, mfloat8x16_t, double, double, int, PST, int, double, svbool_t); + argpass_no_z_callee(u, v, w, .0, .0, 1, *p, 2, 3.0, svptrue_b64()); +} +// CHECK: declare void @argpass_no_z_callee(, <16 x i8> noundef, <16 x i8>, double noundef, double noundef, i32 noundef, ptr noundef, i32 noundef, double noundef, ) + + +// Like the above, using a tuple to occupy some registers. +// x -> z0.d-z3.d +// 0.0 -> d4 +// 1 -> w0 +// *p -> memory, address -> x1 +// 2 -> w2 +// 3.0 -> d5 +// true -> p0 +void test_argpass_no_z_tuple_f64(PST *p, float dummy, svfloat64x4_t x) { + void argpass_no_z_tuple_f64_callee(svfloat64x4_t, double, int, PST, int, + double, svbool_t); + argpass_no_z_tuple_f64_callee(x, .0, 1, *p, 2, 3.0, svptrue_b64()); +} +// CHECK: declare void @argpass_no_z_tuple_f64_callee(, , , , double noundef, i32 noundef, ptr noundef, i32 noundef, double noundef, ) + + +// Likewise, using a different tuple. +// x -> z0.d-z3.d +// 0.0 -> d4 +// 1 -> w0 +// *p -> memory, address -> x1 +// 2 -> w2 +// 3.0 -> d5 +// true -> p0 +void test_argpass_no_z_tuple_mfp8(PST *p, float dummy, svmfloat8x4_t x) { + void argpass_no_z_tuple_mfp8_callee(svmfloat8x4_t, double, int, PST, int, + double, svbool_t); + argpass_no_z_tuple_mfp8_callee(x, .0, 1, *p, 2, 3.0, svptrue_b64()); +} +// CHECK: declare void @argpass_no_z_tuple_mfp8_callee(, , , , double noundef, i32 noundef, ptr noundef, i32 noundef, double noundef, ) + + +// Not enough Z-regs (consumed by a HFA), PST passed indirectly +// 0.0 -> d0 +// *h -> s1-s4 +// 1 -> w0 +// *p -> memory, address -> x1 +// p -> x1 +// 2 -> w2 +// true -> p0 +void test_argpass_no_z_hfa(HFA *h, PST *p) { + void argpass_no_z_hfa_callee(double, HFA, int, PST, int, svbool_t); + argpass_no_z_hfa_callee(.0, *h, 1, *p, 2, svptrue_b64()); +} +// CHECK-AAPCS: declare void @argpass_no_z_hfa_callee(double noundef, [4 x float] alignstack(8), i32 noundef, ptr noundef, i32 noundef, ) +// CHECK-DARWIN: declare void @argpass_no_z_hfa_callee(double noundef, [4 x float], i32 noundef, ptr noundef, i32 noundef, ) + +// Not enough Z-regs (consumed by a HVA), PST passed indirectly +// 0.0 -> d0 +// *h -> s1-s4 +// 1 -> w0 +// *p -> memory, address -> x1 +// p -> x1 +// 2 -> w2 +// true -> p0 +void test_argpass_no_z_hva(HVA *h, PST *p) { + void argpass_no_z_hva_callee(double, HVA, int, PST, int, svbool_t); + argpass_no_z_hva_callee(.0, *h, 1, *p, 2, svptrue_b64()); +} +// CHECK-AAPCS: declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>] alignstack(16), i32 noundef, ptr noundef, i32 noundef, ) +// CHECK-DARWIN: declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>], i32 noundef, ptr noundef, i32 noundef, ) + +// Not enough P-regs, PST passed indirectly, Z-regs and P-regs still available. +// true -> p0-p2 +// 1 -> w0 +// *p -> memory, address -> x1 +// 2 -> w2 +// 3.0 -> d0 +// true -> p3 +void test_argpass_no_p(PST *p) { + void argpass_no_p_callee(svbool_t, svbool_t, svbool_t, int, PST, int, double, svbool_t); + argpass_no_p_callee(svptrue_b8(), svptrue_b16(), svptrue_b32(), 1, *p, 2, 3.0, svptrue_b64()); +} +// CHECK: declare void @argpass_no_p_callee(, , , i32 noundef, ptr noundef, i32 noundef, double noundef, ) + + +// Like above, using a tuple to occupy some registers. +// P-regs still available. +// v -> p0-p1 +// u -> p2 +// 1 -> w0 +// *p -> memory, address -> x1 +// 2 -> w2 +// 3.0 -> d0 +// true -> p3 +void test_argpass_no_p_tuple(PST *p, svbool_t u, svboolx2_t v) { + void argpass_no_p_tuple_callee(svboolx2_t, svbool_t, int, PST, int, double, + svbool_t); + argpass_no_p_tuple_callee(v, u, 1, *p, 2, 3.0, svptrue_b64()); +} +// CHECK: declare void @argpass_no_p_tuple_callee(, , , i32 noundef, ptr noundef, i32 noundef, double noundef, ) + + +// HFAs go back-to-back to memory, afterwards Z-regs not available, PST passed indirectly. +// 0.0 -> d0-d3 +// *h -> memory +// *p -> memory, address -> x0 +// *h -> memory +// false -> p0 +void test_after_hfa(HFA *h, PST *p) { + void after_hfa_callee(double, double, double, double, double, HFA, PST, HFA, svbool_t); + after_hfa_callee(.0, .0, .0, .0, .0, *h, *p, *h, svpfalse()); +} +// CHECK-AAPCS: declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float] alignstack(8), ptr noundef, [4 x float] alignstack(8), ) +// CHECK-DARWIN: declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float], ptr noundef, [4 x float], ) + +// Small PST, not enough registers, passed indirectly, unlike other small +// aggregates. +// *s -> x0-x1 +// 0.0 -> d0-d7 +// *p -> memory, address -> x2 +// 1.0 -> memory +// 2.0 -> memory (next to the above) +void test_small_pst(SmallPST *p, SmallAgg *s) { + void small_pst_callee(SmallAgg, double, double, double, double, double, double, double, double, double, SmallPST, double); + small_pst_callee(*s, .0, .0, .0, .0, .0, .0, .0, .0, 1.0, *p, 2.0); +} +// CHECK-AAPCS: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, ptr noundef, double noundef) +// CHECK-DARWIN: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, i128, double noundef) + + +// Simple return, PST expanded to registers +// p->a -> p0 +// p->x -> q0 +// p->y[0] -> q1 +// p->y[1] -> q2 +// p->z -> q3 +// p->b -> p1 +PST test_return(PST *p) { + return *p; +} +// CHECK-AAPCS: define dso_local <{ , , , , , }> @test_return(ptr +// CHECK-DARWIN: define void @test_return(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.PST) align 16 %agg.result, ptr nocapture noundef readonly %p) + +// Corner case of 1-element aggregate +// p->x -> q0 +SmallPST test_return_small_pst(SmallPST *p) { + return *p; +} +// CHECK-AAPCS: define dso_local @test_return_small_pst(ptr +// CHECK-DARWIN: define i128 @test_return_small_pst(ptr nocapture noundef readonly %p) + + +// Big PST, returned indirectly +// *p -> *x8 +BigPST test_return_big_pst(BigPST *p) { + return *p; +} +// CHECK-AAPCS: define dso_local void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 %agg.result, ptr nocapture noundef readonly %p) +// CHECK-DARWIN: define void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 %agg.result, ptr nocapture noundef readonly %p) + +// Variadic arguments are unnamed, PST passed indirectly. +// (Passing SVE types to a variadic function currently unsupported by +// the AArch64 backend) +// p->a -> p0 +// p->x -> q0 +// p->y[0] -> q1 +// p->y[1] -> q2 +// p->z -> q3 +// p->b -> p1 +// *q -> memory, address -> x1 +void test_pass_variadic(PST *p, PST *q) { + void pass_variadic_callee(PST, ...); + pass_variadic_callee(*p, *q); +} +// CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false) +// CHECK-AAPCS: call void (, , , , , , ...) @pass_variadic_callee( %1, %cast.scalable1, %cast.scalable2, %cast.scalable3, %cast.scalable4, %12, ptr noundef nonnull %byval-temp) + +// CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %p, i64 96, i1 false) +// CHECK-DARWIN: call void @llvm.lifetime.start.p0(i64 96, ptr nonnull %byval-temp1) +// CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp1, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false) +// CHECK-DARWIN: call void (ptr, ...) @pass_variadic_callee(ptr noundef nonnull %byval-temp, ptr noundef nonnull %byval-temp1) + + +// Test passing a small PST, still passed indirectly, despite being <= 128 bits +void test_small_pst_variadic(SmallPST *p) { + void small_pst_variadic_callee(int, ...); + small_pst_variadic_callee(0, *p); +} +// CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) %byval-temp, ptr noundef nonnull align 16 dereferenceable(16) %p, i64 16, i1 false) +// CHECK-AAPCS: call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, ptr noundef nonnull %byval-temp) + +// CHECK-DARWIN: %0 = load i128, ptr %p, align 16 +// CHECK-DARWIN: tail call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, i128 %0) + +// Test handling of a PST argument when passed in registers, from the callee side. +void test_argpass_callee_side(PST v) { + void use(PST *p); + use(&v); +} +// CHECK-AAPCS: define dso_local void @test_argpass_callee_side( %0, %.coerce1, %.coerce3, %.coerce5, %.coerce7, %1) +// CHECK-AAPCS-NEXT: entry: +// CHECK-AAPCS-NEXT: %v = alloca %struct.PST, align 16 +// CHECK-AAPCS-NEXT: %.coerce = bitcast %0 to +// CHECK-AAPCS-NEXT: %cast.fixed = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %.coerce, i64 0) +// CHECK-AAPCS-NEXT: store <2 x i8> %cast.fixed, ptr %v, align 16 +// CHECK-AAPCS-NEXT: %2 = getelementptr inbounds nuw i8, ptr %v, i64 16 +// CHECK-AAPCS-NEXT: %cast.fixed2 = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64( %.coerce1, i64 0) +// CHECK-AAPCS-NEXT: store <2 x double> %cast.fixed2, ptr %2, align 16 +// CHECK-AAPCS-NEXT: %3 = getelementptr inbounds nuw i8, ptr %v, i64 32 +// CHECK-AAPCS-NEXT: %cast.fixed4 = tail call <4 x float> @llvm.vector.extract.v4f32.nxv4f32( %.coerce3, i64 0) +// CHECK-AAPCS-NEXT: store <4 x float> %cast.fixed4, ptr %3, align 16 +// CHECK-AAPCS-NEXT: %4 = getelementptr inbounds nuw i8, ptr %v, i64 48 +// CHECK-AAPCS-NEXT: %cast.fixed6 = tail call <4 x float> @llvm.vector.extract.v4f32.nxv4f32( %.coerce5, i64 0) +// CHECK-AAPCS-NEXT: store <4 x float> %cast.fixed6, ptr %4, align 16 +// CHECK-AAPCS-NEXT: %5 = getelementptr inbounds nuw i8, ptr %v, i64 64 +// CHECK-AAPCS-NEXT: %cast.fixed8 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8( %.coerce7, i64 0) +// CHECK-AAPCS-NEXT: store <16 x i8> %cast.fixed8, ptr %5, align 16 +// CHECK-AAPCS-NEXT: %6 = getelementptr inbounds nuw i8, ptr %v, i64 80 +// CHECK-AAPCS-NEXT: %.coerce9 = bitcast %1 to +// CHECK-AAPCS-NEXT: %cast.fixed10 = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8( %.coerce9, i64 0) +// CHECK-AAPCS-NEXT: store <2 x i8> %cast.fixed10, ptr %6, align 16 +// CHECK-AAPCS-NEXT: call void @use(ptr noundef nonnull %v) +// CHECK-AAPCS-NEXT: ret void +// CHECK-AAPCS-NEXT: } + +// Test va_arg operation +#ifdef __cplusplus + extern "C" +#endif +void test_va_arg(int n, ...) { + va_list ap; + va_start(ap, n); + PST v = va_arg(ap, PST); + va_end(ap); + + void use1(bvec, fvec32); + use1(v.a, v.y[1]); +} +// CHECK-AAPCS: define dso_local void @test_va_arg(i32 noundef %n, ...) +// CHECK-AAPCS-NEXT: entry: +// CHECK-AAPCS-NEXT: %ap = alloca %struct.__va_list, align 8 +// CHECK-AAPCS-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %ap) +// CHECK-AAPCS-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap) +// CHECK-AAPCS-NEXT: %gr_offs_p = getelementptr inbounds nuw i8, ptr %ap, i64 24 +// CHECK-AAPCS-NEXT: %gr_offs = load i32, ptr %gr_offs_p, align 8 +// CHECK-AAPCS-NEXT: %0 = icmp sgt i32 %gr_offs, -1 +// CHECK-AAPCS-NEXT: br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg +// CHECK-AAPCS-EMPTY: +// CHECK-AAPCS-NEXT: vaarg.maybe_reg: ; preds = %entry + +// Increment by 8, size of the pointer to the argument value, not size of the argument value itself. + +// CHECK-AAPCS-NEXT: %new_reg_offs = add nsw i32 %gr_offs, 8 +// CHECK-AAPCS-NEXT: store i32 %new_reg_offs, ptr %gr_offs_p, align 8 +// CHECK-AAPCS-NEXT: %inreg = icmp ult i32 %gr_offs, -7 +// CHECK-AAPCS-NEXT: br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack +// CHECK-AAPCS-EMPTY: +// CHECK-AAPCS-NEXT: vaarg.in_reg: ; preds = %vaarg.maybe_reg +// CHECK-AAPCS-NEXT: %reg_top_p = getelementptr inbounds nuw i8, ptr %ap, i64 8 +// CHECK-AAPCS-NEXT: %reg_top = load ptr, ptr %reg_top_p, align 8 +// CHECK-AAPCS-NEXT: %1 = sext i32 %gr_offs to i64 +// CHECK-AAPCS-NEXT: %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1 +// CHECK-AAPCS-NEXT: br label %vaarg.end +// CHECK-AAPCS-EMPTY: +// CHECK-AAPCS-NEXT: vaarg.on_stack: ; preds = %vaarg.maybe_reg, %entry +// CHECK-AAPCS-NEXT: %stack = load ptr, ptr %ap, align 8 +// CHECK-AAPCS-NEXT: %new_stack = getelementptr inbounds i8, ptr %stack, i64 8 +// CHECK-AAPCS-NEXT: store ptr %new_stack, ptr %ap, align 8 +// CHECK-AAPCS-NEXT: br label %vaarg.end +// CHECK-AAPCS-EMPTY: +// CHECK-AAPCS-NEXT: vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg +// CHECK-AAPCS-NEXT: %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ] + +// Extra indirection, for a composite passed indirectly. +// CHECK-AAPCS-NEXT: %vaarg.addr = load ptr, ptr %vaargs.addr, align 8 + +// CHECK-AAPCS-NEXT: %v.sroa.0.0.copyload = load <2 x i8>, ptr %vaarg.addr, align 16 +// CHECK-AAPCS-NEXT: %v.sroa.43.0.vaarg.addr.sroa_idx = getelementptr inbounds i8, ptr %vaarg.addr, i64 48 +// CHECK-AAPCS-NEXT: %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0.vaarg.addr.sroa_idx, align 16 +// CHECK-AAPCS-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap) +// CHECK-AAPCS-NEXT: %cast.scalable = call @llvm.vector.insert.nxv2i8.v2i8( undef, <2 x i8> %v.sroa.0.0.copyload, i64 0) +// CHECK-AAPCS-NEXT: %3 = bitcast %cast.scalable to +// CHECK-AAPCS-NEXT: %cast.scalable2 = call @llvm.vector.insert.nxv4f32.v4f32( undef, <4 x float> %v.sroa.43.0.copyload, i64 0) +// CHECK-AAPCS-NEXT: call void @use1( noundef %3, noundef %cast.scalable2) +// CHECK-AAPCS-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %ap) +// CHECK-AAPCS-NEXT: ret void +// CHECK-AAPCS-NEXT: } + +// CHECK-DARWIN: define void @test_va_arg(i32 noundef %n, ...) +// CHECK-DARWIN-NEXT: entry: +// CHECK-DARWIN-NEXT: %ap = alloca ptr, align 8 +// CHECK-DARWIN-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ap) +// CHECK-DARWIN-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap) +// CHECK-DARWIN-NEXT: %argp.cur = load ptr, ptr %ap, align 8 +// CHECK-DARWIN-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8 +// CHECK-DARWIN-NEXT: store ptr %argp.next, ptr %ap, align 8 +// CHECK-DARWIN-NEXT: %0 = load ptr, ptr %argp.cur, align 8 +// CHECK-DARWIN-NEXT: %v.sroa.0.0.copyload = load <2 x i8>, ptr %0, align 16 +// CHECK-DARWIN-NEXT: %v.sroa.43.0..sroa_idx = getelementptr inbounds i8, ptr %0, i64 48 +// CHECK-DARWIN-NEXT: %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0..sroa_idx, align 16 +// CHECK-DARWIN-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap) +// CHECK-DARWIN-NEXT: %cast.scalable = call @llvm.vector.insert.nxv2i8.v2i8( undef, <2 x i8> %v.sroa.0.0.copyload, i64 0) +// CHECK-DARWIN-NEXT: %1 = bitcast %cast.scalable to +// CHECK-DARWIN-NEXT: %cast.scalable2 = call @llvm.vector.insert.nxv4f32.v4f32( undef, <4 x float> %v.sroa.43.0.copyload, i64 0) +// CHECK-DARWIN-NEXT: call void @use1( noundef %1, noundef %cast.scalable2) +// CHECK-DARWIN-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap) +// CHECK-DARWIN-NEXT: ret void +// CHECK-DARWIN-NEXT: } From b27acebe63b128feb3bb9c3c62d77f235d2e6a6e Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Mon, 28 Oct 2024 11:57:00 -0400 Subject: [PATCH 182/425] [AMDGPU][True16][test] update VOPC/VOPCX test for true16/fake16 (#112829) This is a non-functional change update GFX11/GFX12 VOPC/VOPCX asm/dasm test for true16/fake16: 1. duplicate files to be true16/fake16 by adding "-mattr=+real-true16/-mattr=-real-true16" while true16 test file will be updated to true16 format when the true16 instructions are supported 2. sort "*t16_err.s" and "*t16_promote.s" tests to alphabetic order. tests to alphabetic order. This is for the upcoming true16 mc changes, and mainly trying to help repo maintainer to resolve conflicts in the tests quickly. A script is proposed to help for the sorting https://github.com/llvm/llvm-project/pull/111769. Since these two files are t16 only, it should not create conflicts in downstream branches 3. add `-filetype=null` to seperate stdout and stderr to avoid disordered output from llvm-mc --- .../AMDGPU/gfx11_asm_vop3_from_vopc-fake16.s | 10486 +++++++++++++++ .../test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s | 8 +- llvm/test/MC/AMDGPU/gfx11_asm_vopc-fake16.s | 10948 ++++++++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_vopc.s | 8 +- .../MC/AMDGPU/gfx11_asm_vopc_dpp16-fake16.s | 7172 ++++++++++ llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s | 8 +- .../MC/AMDGPU/gfx11_asm_vopc_dpp8-fake16.s | 1540 +++ llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s | 8 +- llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s | 2962 ++--- .../MC/AMDGPU/gfx11_asm_vopc_t16_promote.s | 2622 ++-- llvm/test/MC/AMDGPU/gfx11_asm_vopcx-fake16.s | 4106 ++++++ llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s | 4 +- .../MC/AMDGPU/gfx11_asm_vopcx_dpp16-fake16.s | 2690 ++++ llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s | 4 +- .../MC/AMDGPU/gfx11_asm_vopcx_dpp8-fake16.s | 578 + llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s | 4 +- llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s | 718 +- .../MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s | 718 +- llvm/test/MC/AMDGPU/gfx12_asm_vop3c-fake16.s | 8695 ++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s | 8 +- llvm/test/MC/AMDGPU/gfx12_asm_vopc-fake16.s | 9076 +++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vopc.s | 8 +- .../MC/AMDGPU/gfx12_asm_vopc_dpp16-fake16.s | 6052 +++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s | 8 +- .../MC/AMDGPU/gfx12_asm_vopc_dpp8-fake16.s | 1300 ++ llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s | 8 +- llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s | 2693 ++-- .../MC/AMDGPU/gfx12_asm_vopc_t16_promote.s | 3881 +++--- llvm/test/MC/AMDGPU/gfx12_asm_vopcx-fake16.s | 3404 +++++ llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s | 4 +- .../MC/AMDGPU/gfx12_asm_vopcx_dpp16-fake16.s | 2270 ++++ llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s | 4 +- .../MC/AMDGPU/gfx12_asm_vopcx_dpp8-fake16.s | 488 + llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s | 4 +- llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s | 647 +- .../MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s | 647 +- .../Disassembler/AMDGPU/gfx11_dasm_vopc.txt | 6 +- .../AMDGPU/gfx11_dasm_vopc_dpp16.txt | 6 +- .../AMDGPU/gfx11_dasm_vopc_dpp8.txt | 6 +- .../Disassembler/AMDGPU/gfx11_dasm_vopcx.txt | 6 +- .../AMDGPU/gfx11_dasm_vopcx_dpp16.txt | 6 +- .../AMDGPU/gfx11_dasm_vopcx_dpp8.txt | 6 +- .../Disassembler/AMDGPU/gfx12_dasm_vopc.txt | 6 +- .../AMDGPU/gfx12_dasm_vopc_dpp16.txt | 6 +- .../AMDGPU/gfx12_dasm_vopc_dpp8.txt | 6 +- .../Disassembler/AMDGPU/gfx12_dasm_vopcx.txt | 6 +- .../AMDGPU/gfx12_dasm_vopcx_dpp16.txt | 6 +- .../AMDGPU/gfx12_dasm_vopcx_dpp8.txt | 6 +- 48 files changed, 76343 insertions(+), 7510 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vopc-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vopcx-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vop3c-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vopc-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vopcx-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16-fake16.s create mode 100644 llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8-fake16.s diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc-fake16.s new file mode 100644 index 000000000000000..faa2b1f97699971 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc-fake16.s @@ -0,0 +1,10486 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, v255, v2 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, s1, v2 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, s105, v255 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, vcc_lo, s2 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, vcc_hi, s105 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x6b,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, ttmp15, ttmp15 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7b,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, m0, src_scc +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7d,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x7d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x7d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], v1, 0.5 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], v255, v2 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], s1, v2 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], s105, v255 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], vcc_lo, s2 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], vcc_hi, s105 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x6b,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], ttmp15, ttmp15 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7b,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], m0, src_scc +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7d,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x7d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi +// GFX11: encoding: [0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cmp_class_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x7e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x7e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x7e,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x7e,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x7e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x7e,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x7e,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 null, -|0xaf123456|, vcc_hi +// GFX11: encoding: [0x7c,0x01,0x7e,0xd4,0xff,0xd6,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cmp_class_f64_e64 s5, v[1:2], v2 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, v[1:2], v255 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, v[1:2], s2 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, v[1:2], s105 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0xd3,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, v[254:255], ttmp15 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0xfe,0xf7,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, s[2:3], vcc_hi +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x02,0xd6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, s[104:105], vcc_lo +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x68,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, vcc, m0 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x6a,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, ttmp[14:15], exec_hi +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x7a,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, exec, exec_lo +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x7e,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s105, null, null +// W32: encoding: [0x69,0x00,0x7f,0xd4,0x7c,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x7f,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 vcc_hi, 0.5, 0.5 +// W32: encoding: [0x6b,0x00,0x7f,0xd4,0xf0,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 ttmp15, -|src_scc|, src_scc +// W32: encoding: [0x7b,0x01,0x7f,0xd4,0xfd,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[1:2], v2 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[1:2], v255 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[1:2], s2 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[1:2], s105 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xd3,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[254:255], ttmp15 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0xfe,0xf7,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], s[2:3], vcc_hi +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x02,0xd6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], s[104:105], vcc_lo +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x68,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], vcc, m0 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x6a,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], ttmp[14:15], exec_hi +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7a,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], exec, exec_lo +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7e,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], null, null +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7c,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x7f,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 vcc, 0.5, 0.5 +// W64: encoding: [0x6a,0x00,0x7f,0xd4,0xf0,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 ttmp[14:15], -|src_scc|, src_scc +// W64: encoding: [0x7a,0x01,0x7f,0xd4,0xfd,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456 +// GFX11: encoding: [0x7c,0x00,0x7f,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_eq_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x02,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x02,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x02,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x02,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x02,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x02,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x02,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x02,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_eq_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x12,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x12,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x12,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x12,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x12,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x12,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x12,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x12,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x12,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x12,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x12,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x12,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_eq_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x22,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x22,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x22,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x22,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x22,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x22,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x22,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x22,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x22,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x22,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x22,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x22,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x22,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x22,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x22,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_eq_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x32,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x32,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x32,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x32,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x32,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x32,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x32,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x32,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x32,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x32,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x32,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x32,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_eq_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x42,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x42,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x42,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x42,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x42,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x42,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x42,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x42,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x42,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x42,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x42,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_eq_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x52,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x52,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x52,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x52,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x52,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x52,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x52,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x52,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x52,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x52,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x52,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x52,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x52,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x52,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_eq_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3a,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3a,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3a,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3a,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3a,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3a,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3a,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x3a,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_eq_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4a,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4a,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4a,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4a,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4a,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4a,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4a,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x4a,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_eq_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5a,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5a,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5a,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5a,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5a,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5a,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5a,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x5a,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_f_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x00,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x00,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x00,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x00,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x00,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x00,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x00,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x00,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x00,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x00,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x00,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x00,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x00,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x00,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x00,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x00,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x00,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x00,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x00,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x00,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x00,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x00,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x00,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x00,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x00,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x00,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_f_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x10,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x10,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x10,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x10,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x10,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x10,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x10,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x10,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x10,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x10,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x10,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x10,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x10,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x10,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x10,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x10,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x10,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x10,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x10,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x10,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x10,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x10,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x10,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x10,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x10,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x10,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x10,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x10,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_f_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x20,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x20,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x20,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x20,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x20,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x20,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x20,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x20,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x20,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x20,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x20,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x20,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x20,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x20,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x20,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x20,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x20,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x20,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x20,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x20,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x20,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_f_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x40,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x40,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x40,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x40,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x40,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x40,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x40,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x40,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x40,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x40,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x40,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x40,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x40,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x40,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x40,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x40,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x40,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x40,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x40,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_f_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x50,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x50,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x50,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x50,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x50,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x50,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x50,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x50,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x50,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x50,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x50,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x50,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x50,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x50,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x50,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x50,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x50,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x50,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x50,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x50,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x50,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x50,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_i64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x50,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_f_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x48,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x48,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x48,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x48,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x48,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x48,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x48,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x48,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x48,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x48,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x48,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x48,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x48,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x48,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x48,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x48,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x48,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x48,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x48,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_f_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x58,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x58,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x58,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x58,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x58,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x58,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x58,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x58,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x58,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x58,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x58,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x58,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x58,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x58,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x58,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x58,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x58,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x58,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x58,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x58,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x58,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x58,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_f_u64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x58,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ge_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x06,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x06,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x06,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x06,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x06,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x06,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x06,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x06,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_ge_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x16,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x16,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x16,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x16,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x16,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x16,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x16,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x16,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x16,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x16,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x16,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x16,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_ge_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x26,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x26,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x26,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x26,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x26,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x26,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x26,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x26,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x26,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x26,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x26,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x26,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x26,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x26,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x26,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_ge_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x36,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x36,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x36,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x36,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x36,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x36,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x36,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x36,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x36,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x36,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x36,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x36,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_ge_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x46,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x46,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x46,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x46,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x46,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x46,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x46,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x46,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x46,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x46,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x46,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ge_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x56,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x56,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x56,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x56,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x56,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x56,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x56,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x56,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x56,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x56,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x56,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x56,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x56,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x56,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ge_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3e,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3e,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3e,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3e,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x3e,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_ge_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4e,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4e,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4e,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4e,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x4e,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ge_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5e,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5e,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5e,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5e,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5e,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5e,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5e,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x5e,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_gt_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x04,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x04,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x04,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x04,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x04,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x04,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x04,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x04,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_gt_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x14,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x14,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x14,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x14,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x14,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x14,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x14,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x14,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x14,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x14,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x14,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x14,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_gt_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x24,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x24,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x24,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x24,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x24,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x24,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x24,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x24,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x24,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x24,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x24,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x24,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x24,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x24,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x24,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_gt_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x34,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x34,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x34,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x34,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x34,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x34,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x34,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x34,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x34,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x34,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x34,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x34,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_gt_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x44,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x44,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x44,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x44,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x44,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x44,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x44,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x44,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x44,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x44,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x44,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_gt_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x54,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x54,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x54,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x54,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x54,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x54,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x54,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x54,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x54,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x54,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x54,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x54,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x54,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x54,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_gt_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3c,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3c,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3c,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3c,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3c,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3c,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3c,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x3c,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_gt_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4c,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4c,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4c,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4c,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4c,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4c,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4c,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x4c,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_gt_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5c,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5c,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5c,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5c,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5c,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5c,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5c,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x5c,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_le_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x03,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x03,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x03,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x03,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x03,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x03,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x03,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x03,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_le_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x13,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x13,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x13,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x13,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x13,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x13,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x13,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x13,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x13,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x13,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x13,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x13,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_le_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x23,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x23,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x23,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x23,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x23,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x23,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x23,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x23,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x23,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x23,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x23,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x23,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x23,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x23,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x23,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_le_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x33,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x33,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x33,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x33,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x33,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x33,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x33,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x33,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x33,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x33,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x33,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x33,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_le_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x43,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x43,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x43,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x43,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x43,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x43,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x43,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x43,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x43,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x43,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x43,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_le_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x53,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x53,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x53,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x53,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x53,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x53,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x53,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x53,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x53,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x53,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x53,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x53,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x53,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x53,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_le_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3b,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3b,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3b,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3b,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3b,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3b,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3b,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x3b,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_le_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4b,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4b,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4b,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4b,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4b,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4b,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4b,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x4b,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_le_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5b,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5b,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5b,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5b,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5b,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5b,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5b,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x5b,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_lg_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x05,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x05,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x05,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x05,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x05,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x05,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x05,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x05,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_lg_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x15,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x15,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x15,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x15,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x15,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x15,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x15,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x15,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x15,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x15,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x15,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x15,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_lg_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x25,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x25,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x25,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x25,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x25,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x25,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x25,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x25,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x25,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x25,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x25,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x25,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x25,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x25,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x25,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_lt_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x01,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x01,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x01,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x01,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x01,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x01,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x01,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x01,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x01,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x01,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x01,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x01,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x01,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_lt_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x11,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x11,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x11,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x11,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x11,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x11,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x11,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x11,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x11,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x11,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x11,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x11,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_lt_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x21,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x21,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x21,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x21,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x21,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x21,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x21,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x21,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x21,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x21,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x21,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x21,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x21,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x21,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x21,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_lt_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x31,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x31,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x31,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x31,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x31,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x31,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x31,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x31,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x31,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x31,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x31,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x31,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_lt_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x41,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x41,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x41,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x41,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x41,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x41,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x41,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x41,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x41,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x41,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x41,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_lt_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x51,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x51,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x51,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x51,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x51,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x51,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x51,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x51,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x51,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x51,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x51,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x51,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x51,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x51,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_lt_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x39,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x39,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x39,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x39,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x39,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x39,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x39,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x39,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x39,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x39,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x39,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x39,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_lt_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x49,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x49,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x49,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x49,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x49,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x49,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x49,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x49,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x49,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x49,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x49,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_lt_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x59,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x59,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x59,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x59,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x59,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x59,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x59,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x59,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x59,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x59,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x59,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x59,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x59,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x59,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ne_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x35,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x35,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x35,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x35,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x35,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x35,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x35,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x35,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x35,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x35,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x35,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x35,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_ne_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x45,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x45,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x45,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x45,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x45,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x45,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x45,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x45,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x45,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x45,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x45,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ne_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x55,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x55,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x55,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x55,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x55,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x55,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x55,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x55,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x55,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x55,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x55,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x55,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x55,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x55,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ne_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3d,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3d,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3d,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3d,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 null, 0xfe0b, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x3d,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_ne_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4d,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4d,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4d,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4d,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x4d,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ne_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5d,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5d,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5d,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5d,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5d,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5d,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5d,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x5d,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_neq_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_neq_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1d,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1d,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1d,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1d,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x1d,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_neq_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2d,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2d,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2d,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2d,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2d,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2d,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2d,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2d,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2d,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x2d,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_nge_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x09,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x09,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x09,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x09,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x09,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x09,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x09,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x09,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_nge_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x19,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x19,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x19,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x19,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x19,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x19,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x19,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x19,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x19,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x19,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x19,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x19,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_nge_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x29,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x29,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x29,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x29,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x29,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x29,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x29,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x29,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x29,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x29,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x29,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x29,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x29,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x29,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x29,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_ngt_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0b,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0b,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0b,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0b,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0b,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_ngt_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1b,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1b,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1b,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1b,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1b,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1b,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1b,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1b,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1b,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x1b,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_ngt_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2b,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2b,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2b,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2b,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2b,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2b,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2b,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2b,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2b,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x2b,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_nle_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0c,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0c,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0c,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0c,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0c,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_nle_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1c,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1c,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1c,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1c,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1c,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1c,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1c,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1c,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1c,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x1c,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_nle_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2c,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2c,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2c,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2c,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2c,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2c,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2c,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2c,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2c,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x2c,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_nlg_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0a,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0a,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0a,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0a,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0a,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_nlg_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1a,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1a,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1a,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1a,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1a,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1a,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1a,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1a,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1a,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x1a,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_nlg_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2a,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2a,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2a,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2a,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2a,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2a,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2a,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2a,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2a,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x2a,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_nlt_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_nlt_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1e,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1e,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1e,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1e,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x1e,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_nlt_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2e,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2e,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2e,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2e,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2e,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2e,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2e,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2e,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2e,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x2e,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_o_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x07,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x07,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x07,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x07,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x07,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x07,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x07,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x07,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_o_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x17,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x17,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x17,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x17,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x17,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x17,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x17,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x17,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x17,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x17,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x17,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x17,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_o_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x27,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x27,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x27,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x27,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x27,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x27,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x27,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x27,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x27,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x27,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x27,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x27,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x27,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x27,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x27,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_t_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0f,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0f,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0f,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0f,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0f,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0f,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0f,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x0f,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_t_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1f,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1f,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1f,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1f,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1f,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1f,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1f,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1f,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1f,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x1f,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_t_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2f,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2f,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2f,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2f,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2f,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2f,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2f,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2f,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2f,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x2f,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_t_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x47,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x47,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x47,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x47,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x47,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x47,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x47,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x47,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x47,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x47,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x47,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x47,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x47,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x47,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x47,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x47,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x47,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x47,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x47,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_t_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x57,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x57,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x57,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x57,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x57,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x57,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x57,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x57,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x57,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x57,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x57,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x57,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x57,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x57,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x57,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x57,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x57,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x57,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x57,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x57,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x57,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x57,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_i64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x57,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_t_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4f,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4f,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4f,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4f,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4f,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4f,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4f,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4f,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4f,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4f,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4f,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4f,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4f,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4f,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4f,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u32_e64 null, 0xaf123456, vcc_hi +// GFX11: encoding: [0x7c,0x00,0x4f,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_t_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5f,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5f,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5f,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5f,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5f,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5f,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5f,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5f,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5f,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5f,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5f,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5f,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5f,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_t_u64_e64 null, 0xaf123456, vcc +// GFX11: encoding: [0x7c,0x00,0x5f,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_tru_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0f,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0f,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0f,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0f,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0f,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0f,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0f,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x0f,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_tru_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1f,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1f,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1f,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1f,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1f,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1f,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1f,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1f,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1f,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x1f,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_tru_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2f,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2f,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2f,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2f,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2f,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2f,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2f,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2f,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2f,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_tru_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x2f,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_u_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x08,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x08,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x08,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x08,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x08,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x08,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x08,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x08,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_u_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x18,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x18,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x18,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x18,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x18,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x18,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x18,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x18,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x18,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x18,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x18,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX11: encoding: [0x7c,0x83,0x18,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_u_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x28,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x28,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x28,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x28,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x28,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x28,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x28,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x28,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x28,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x28,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x28,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x28,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x28,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x28,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX11: encoding: [0x7c,0x82,0x28,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s index d0e79c0aa3444bb..faa2b1f97699971 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_e64 s5, v1, v2 // W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc-fake16.s new file mode 100644 index 000000000000000..e896511e6e5c654 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc-fake16.s @@ -0,0 +1,10948 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_e32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0xfd,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0xfd,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, v[1:2], v2 +// W32: encoding: [0x01,0x05,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, v[254:255], v2 +// W32: encoding: [0xfe,0x05,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, s[2:3], v2 +// W32: encoding: [0x02,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, s[104:105], v2 +// W32: encoding: [0x68,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, vcc, v2 +// W32: encoding: [0x6a,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, ttmp[14:15], v2 +// W32: encoding: [0x7a,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, exec, v2 +// W32: encoding: [0x7e,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, v[1:2], v2 +// W64: encoding: [0x01,0x05,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, v[254:255], v2 +// W64: encoding: [0xfe,0x05,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, s[2:3], v2 +// W64: encoding: [0x02,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, s[104:105], v2 +// W64: encoding: [0x68,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, vcc, v2 +// W64: encoding: [0x6a,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, ttmp[14:15], v2 +// W64: encoding: [0x7a,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, exec, v2 +// W64: encoding: [0x7e,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x25,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x25,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x45,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x45,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x64,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x64,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x85,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x85,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xa5,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xa5,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x74,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x74,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x95,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x95,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x00,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x00,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x20,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x21,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x20,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x21,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x40,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x41,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x40,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x41,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x80,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x81,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x80,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x81,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xa0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xa1,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xa0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xa1,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x90,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x91,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x90,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x91,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xb0,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xb1,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xb0,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xb1,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x2d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x2d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x4d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x4d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x6c,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x6c,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x8d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x8d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xad,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xad,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x7c,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x7c,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x9d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x9d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x29,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x29,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x49,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x49,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x68,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x68,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x89,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x89,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xa9,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xa9,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x78,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x78,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x99,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x99,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x27,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x27,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x47,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x47,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x66,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x66,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x87,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x87,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xa7,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xa7,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x76,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x76,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x97,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x97,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x2b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x2b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x4b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x4b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x02,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x02,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x23,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x23,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x43,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x43,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x62,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x62,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x83,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x83,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xa3,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xa3,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x72,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x72,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x93,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x93,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xb3,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xb3,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x6a,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x6a,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x8b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x8b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xab,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xab,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x7a,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x7a,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x9b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x9b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x3b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x3b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x33,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x33,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x37,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x37,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x39,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x39,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x35,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x35,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x3d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x3d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x2f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x2f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x3f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x3f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x5f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x5f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x8e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x8f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x8e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x8f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xae,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xaf,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xae,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xaf,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x9e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x9f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x9e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x9f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xbe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xbf,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xbe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xbf,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x1e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x1e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x3e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x3f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x3e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x3f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x5e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x5f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x5e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x5f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x31,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x31,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x51,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x51,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s index 5349362b8fbaf42..05ed37c612ba386 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_e32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xfa,0x7c] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16-fake16.s new file mode 100644 index 000000000000000..aa315a16b9838fb --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16-fake16.s @@ -0,0 +1,7172 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x25,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x25,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x64,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x64,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x85,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x85,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x74,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x74,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x21,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x21,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x81,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x81,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x91,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x91,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x2d,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x2d,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x6c,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x6c,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x8d,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x8d,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x7c,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x7c,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x68,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x68,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x89,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x89,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x78,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x78,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x27,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x27,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x66,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x66,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x87,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x87,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x76,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x76,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x2b,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x2b,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x02,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x02,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x23,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x23,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x62,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x62,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x83,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x83,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x72,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x72,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x93,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x93,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x6a,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x6a,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x8b,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x8b,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x7a,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x7a,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x3f,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x3f,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x8f,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x8f,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x9f,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x9f,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x3f,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x3f,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x31,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x31,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s index 1299d02c3c0a535..bc77f0c1967d00c 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0] // W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8-fake16.s new file mode 100644 index 000000000000000..6f2f9e6704deeab --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8-fake16.s @@ -0,0 +1,1540 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x24,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x25,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x24,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x25,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x64,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x64,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x64,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x64,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x64,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x64,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x84,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x84,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x85,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x84,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x84,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x85,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x74,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x74,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x74,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x74,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x74,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x74,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x94,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x94,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x94,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x94,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x00,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x00,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x20,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x20,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x21,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x20,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x20,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x21,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x80,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x80,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x81,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x80,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x80,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x81,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x90,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x90,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x91,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x90,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x90,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_f_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x91,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x2d,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x2d,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x6c,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x6c,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x8d,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x8d,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x7c,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x7c,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x28,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x29,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x28,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x29,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x68,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x68,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x68,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x68,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x68,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x68,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x88,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x88,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x89,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x88,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x88,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x89,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x78,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x78,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x78,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x78,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x78,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x78,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x98,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x98,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x98,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x98,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x26,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x27,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x26,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x27,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x66,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x66,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x66,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x66,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x66,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x66,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x86,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x86,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x87,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x86,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x86,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x87,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x76,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x76,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x76,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x76,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x76,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x76,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x96,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x96,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x96,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x96,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x2b,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x2b,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x02,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x02,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x02,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x02,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x02,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x02,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x22,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x22,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x23,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x22,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x22,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x23,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x62,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x62,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x62,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x62,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x62,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x62,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x82,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x82,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x83,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x82,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x82,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x83,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x72,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x72,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x72,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x72,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x72,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x72,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x92,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x92,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x93,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x92,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x92,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x93,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x6a,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x6a,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x8b,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x8b,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x7a,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x7a,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x32,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x32,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x36,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x36,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x38,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x38,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x34,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x34,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x3f,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x3f,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x8e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x8e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x8f,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x8e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x8e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x8f,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x9e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x9e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x9f,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x9e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x9e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_t_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x9f,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x3f,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_tru_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x3f,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x30,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x31,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x30,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x31,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s index 9f10a29791ad1cb..1c333a0c909170b 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s index 889293b1a0f2343..4b97d276d55616d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s @@ -1,1973 +1,1973 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s v_cmp_class_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_class_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_class_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:40: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:40: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_i16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_i16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_i16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_i16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_u16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_u16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_u16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_u16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_f_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_f_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction v_cmp_f_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_f_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_f_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction v_cmp_f_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_f_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_f_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_f_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction v_cmp_f_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_f_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_f_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_f_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction v_cmp_f_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_f_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_i16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_i16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_i16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_u16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_u16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_u16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_u16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_gt_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_gt_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_gt_i16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_gt_i16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_gt_i16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_i16_e32 vcc, v127, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_gt_i16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, vcc_hi, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc, vcc_lo, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_i16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_i16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_u16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_gt_u16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_gt_u16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_u16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_u16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_u16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_u16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_i16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_i16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_i16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_i16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_u16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_u16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_u16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_u16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lg_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lg_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lg_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_i16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_i16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_i16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_i16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_u16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_u16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_u16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_u16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_ne_i16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_ne_i16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v127, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, vcc_hi, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ne_i16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ne_i16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ne_u16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ne_u16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ne_u16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ne_u16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_neq_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_neq_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_neq_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nge_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nge_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nge_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_ngt_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_ngt_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_ngt_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nle_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nle_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nle_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nlg_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nlg_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nlg_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nlt_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nlt_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nlt_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction v_cmp_o_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction v_cmp_o_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction v_cmp_o_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_t_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction -v_cmp_t_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction -v_cmp_t_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction -v_cmp_t_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc_lo, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc_lo, v127, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_class_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_class_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_eq_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_eq_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_eq_i16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_eq_i16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_eq_u16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_eq_u16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_f_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_f_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_i16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_i16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_u16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_u16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_i16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_i16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_u16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_u16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_i16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_i16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_u16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_u16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lg_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lg_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_i16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_i16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_u16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_u16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_i16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_i16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_u16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_u16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_neq_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_neq_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nge_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nge_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ngt_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ngt_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nle_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nle_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlg_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlg_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlt_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlt_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_o_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_o_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_t_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc_lo, v128, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_f_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc, v127, v255 +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction v_cmp_t_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_t_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_tru_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction -v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction -v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction -v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc, vcc_hi, v255 +// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc, vcc_lo, v255 +// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, v1, v255 +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmp_f_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmp_f_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, v127, v255 +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction -v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v1, v255 +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v127, v255 +// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction -v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction -v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction -v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction -v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, vcc_hi, v255 +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc, vcc_lo, v255 +// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v1, v255 +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v127, v255 +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction -v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction -v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, v127, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction -v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, vcc_hi, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc, vcc_lo, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction -v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction -v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, v127, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_t_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction -v_cmp_t_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction -v_cmp_tru_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, v128, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_tru_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction -v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction -v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s index 75f20b0c7f0c4c2..49a3f8ad63e7ec6 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s @@ -1,1973 +1,1973 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s v_cmp_class_f16 vcc, v1, v255 -// GFX11: v_cmp_class_f16_e64 +// GFX11: v_cmp_class_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x01,0xff,0x03,0x00] -v_cmp_class_f16 vcc, v127, v255 -// GFX11: v_cmp_class_f16_e64 +v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_class_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_class_f16_e64 +v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_class_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_class_f16_e64 +v_cmp_class_f16 vcc, v127, v255 +// GFX11: v_cmp_class_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00] v_cmp_class_f16 vcc, v127, v255 -// GFX11: v_cmp_class_f16_e64 +// GFX11: v_cmp_class_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_class_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_class_f16_e64 +v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_class_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_class_f16_e64 +v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_eq_f16 vcc, v1, v255 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_eq_f16 vcc, v127, v255 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_eq_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_class_f16 vcc, v128, v2 +// GFX11: v_cmp_class_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00] -v_cmp_eq_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_class_f16 vcc, v128, v2 +// GFX11: v_cmp_class_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00] -v_cmp_eq_f16 vcc, v1, v255 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_eq_f16 vcc, v127, v255 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_eq_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_eq_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_eq_i16 vcc, v1, v255 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_class_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, v127, v255 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_class_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_class_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_class_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, v1, v255 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_eq_f16 vcc, v1, v255 +// GFX11: v_cmp_eq_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_i16 vcc, v127, v255 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_eq_f16 vcc, v1, v255 +// GFX11: v_cmp_eq_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_u16 vcc, v1, v255 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_eq_u16 vcc, v127, v255 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_eq_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_eq_f16 vcc, v127, v255 +// GFX11: v_cmp_eq_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_eq_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_eq_f16 vcc, v127, v255 +// GFX11: v_cmp_eq_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_eq_u16 vcc, v1, v255 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_eq_u16 vcc, v127, v255 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_eq_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_eq_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_f_f16 vcc, v1, v255 -// GFX11: v_cmp_f_f16_e64 +v_cmp_eq_f16 vcc, v128, v2 +// GFX11: v_cmp_eq_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00] -v_cmp_f_f16 vcc, v127, v255 -// GFX11: v_cmp_f_f16_e64 +v_cmp_eq_f16 vcc, v128, v2 +// GFX11: v_cmp_eq_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00] -v_cmp_f_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_f_f16_e64 +v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_f_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_f_f16_e64 +v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_f_f16 vcc, v1, v255 -// GFX11: v_cmp_f_f16_e64 +v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_f_f16 vcc, v127, v255 -// GFX11: v_cmp_f_f16_e64 +v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_f_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_f_f16_e64 +v_cmp_eq_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_f_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_f_f16_e64 +v_cmp_eq_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ge_f16 vcc, v1, v255 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_eq_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ge_f16 vcc, v127, v255 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_eq_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ge_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_eq_i16 vcc, v1, v255 +// GFX11: v_cmp_eq_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ge_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_eq_i16 vcc, v1, v255 +// GFX11: v_cmp_eq_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ge_f16 vcc, v1, v255 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_ge_f16 vcc, v127, v255 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_ge_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_ge_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_ge_i16 vcc, v1, v255 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_eq_i16 vcc, v127, v255 +// GFX11: v_cmp_eq_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_ge_i16 vcc, v127, v255 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_eq_i16 vcc, v127, v255 +// GFX11: v_cmp_eq_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_ge_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ge_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ge_i16 vcc, v1, v255 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_i16 vcc, v127, v255 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_eq_i16 vcc, v128, v2 +// GFX11: v_cmp_eq_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x32,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ge_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_eq_i16 vcc, v128, v2 +// GFX11: v_cmp_eq_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x32,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ge_u16 vcc, v1, v255 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ge_u16 vcc, v127, v255 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ge_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ge_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ge_u16 vcc, v1, v255 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_eq_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ge_u16 vcc, v127, v255 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_eq_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ge_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_eq_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ge_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_eq_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_gt_f16 vcc, v1, v255 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_eq_u16 vcc, v1, v255 +// GFX11: v_cmp_eq_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00] -v_cmp_gt_f16 vcc, v127, v255 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_eq_u16 vcc, v1, v255 +// GFX11: v_cmp_eq_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00] -v_cmp_gt_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_gt_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_gt_f16 vcc, v1, v255 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_gt_f16 vcc, v127, v255 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_gt_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_eq_u16 vcc, v127, v255 +// GFX11: v_cmp_eq_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_gt_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_eq_u16 vcc, v127, v255 +// GFX11: v_cmp_eq_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_gt_i16 vcc, v1, v255 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_gt_i16 vcc, v127, v255 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_gt_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_gt_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_gt_i16 vcc, v1, v255 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_eq_u16 vcc, v128, v2 +// GFX11: v_cmp_eq_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3a,0xd4,0x80,0x05,0x02,0x00] -v_cmp_gt_i16 vcc, v127, v255 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_eq_u16 vcc, v128, v2 +// GFX11: v_cmp_eq_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3a,0xd4,0x80,0x05,0x02,0x00] -v_cmp_gt_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_gt_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_gt_u16 vcc, v1, v255 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_gt_u16 vcc, v127, v255 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_eq_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_gt_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_eq_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_gt_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_eq_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_eq_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_gt_u16 vcc, v1, v255 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_eq_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_gt_u16 vcc, v127, v255 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_eq_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_eq_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_gt_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_f_f16 vcc, v1, v255 +// GFX11: v_cmp_f_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00] -v_cmp_gt_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_f_f16 vcc, v1, v255 +// GFX11: v_cmp_f_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_f16 vcc, v1, v255 -// GFX11: v_cmp_le_f16_e64 +v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_le_f16 vcc, v127, v255 -// GFX11: v_cmp_le_f16_e64 +v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_le_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_le_f16_e64 +v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_le_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_le_f16_e64 +v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_le_f16 vcc, v1, v255 -// GFX11: v_cmp_le_f16_e64 +v_cmp_f_f16 vcc, v127, v255 +// GFX11: v_cmp_f_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x00,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_le_f16 vcc, v127, v255 -// GFX11: v_cmp_le_f16_e64 +v_cmp_f_f16 vcc, v127, v255 +// GFX11: v_cmp_f_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x00,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_le_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_le_f16_e64 +v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_le_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_le_f16_e64 +v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_le_i16 vcc, v1, v255 -// GFX11: v_cmp_le_i16_e64 +v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_le_i16 vcc, v127, v255 -// GFX11: v_cmp_le_i16_e64 +v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_le_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_le_i16_e64 +v_cmp_f_f16 vcc, v128, v2 +// GFX11: v_cmp_f_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x00,0xd4,0x80,0x05,0x02,0x00] -v_cmp_le_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_le_i16_e64 +v_cmp_f_f16 vcc, v128, v2 +// GFX11: v_cmp_f_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x00,0xd4,0x80,0x05,0x02,0x00] -v_cmp_le_i16 vcc, v1, v255 -// GFX11: v_cmp_le_i16_e64 +v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_le_i16 vcc, v127, v255 -// GFX11: v_cmp_le_i16_e64 +v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_le_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_le_i16_e64 +v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_le_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_le_i16_e64 +v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_le_u16 vcc, v1, v255 -// GFX11: v_cmp_le_u16_e64 +v_cmp_f_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x00,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, v127, v255 -// GFX11: v_cmp_le_u16_e64 +v_cmp_f_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x00,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_le_u16_e64 +v_cmp_f_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x00,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_le_u16_e64 +v_cmp_f_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x00,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, v1, v255 -// GFX11: v_cmp_le_u16_e64 +v_cmp_ge_f16 vcc, v1, v255 +// GFX11: v_cmp_ge_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_u16 vcc, v127, v255 -// GFX11: v_cmp_le_u16_e64 +v_cmp_ge_f16 vcc, v1, v255 +// GFX11: v_cmp_ge_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_le_u16_e64 +v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_le_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_le_u16_e64 +v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lg_f16 vcc, v1, v255 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lg_f16 vcc, v127, v255 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lg_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_ge_f16 vcc, v127, v255 +// GFX11: v_cmp_ge_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lg_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_ge_f16 vcc, v127, v255 +// GFX11: v_cmp_ge_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lg_f16 vcc, v1, v255 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lg_f16 vcc, v127, v255 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lg_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_lg_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_lt_f16 vcc, v1, v255 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_ge_f16 vcc, v128, v2 +// GFX11: v_cmp_ge_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00] -v_cmp_lt_f16 vcc, v127, v255 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_ge_f16 vcc, v128, v2 +// GFX11: v_cmp_ge_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00] -v_cmp_lt_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_lt_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_lt_f16 vcc, v1, v255 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_lt_f16 vcc, v127, v255 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_lt_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_ge_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_lt_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_ge_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_lt_i16 vcc, v1, v255 -// GFX11: v_cmp_lt_i16_e64 +v_cmp_ge_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_lt_i16 vcc, v127, v255 -// GFX11: v_cmp_lt_i16_e64 +v_cmp_ge_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_lt_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_lt_i16_e64 +v_cmp_ge_i16 vcc, v1, v255 +// GFX11: v_cmp_ge_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00] -v_cmp_lt_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_lt_i16_e64 - -v_cmp_lt_i16 vcc, v1, v255 -// GFX11: v_cmp_lt_i16_e64 - -v_cmp_lt_i16 vcc, v127, v255 -// GFX11: v_cmp_lt_i16_e64 - -v_cmp_lt_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_lt_i16_e64 - -v_cmp_lt_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_lt_i16_e64 +v_cmp_ge_i16 vcc, v1, v255 +// GFX11: v_cmp_ge_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00] -v_cmp_lt_u16 vcc, v1, v255 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lt_u16 vcc, v127, v255 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lt_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lt_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lt_u16 vcc, v1, v255 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_ge_i16 vcc, v127, v255 +// GFX11: v_cmp_ge_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_u16 vcc, v127, v255 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_ge_i16 vcc, v127, v255 +// GFX11: v_cmp_ge_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lt_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ne_i16 vcc, v1, v255 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ne_i16 vcc, v127, v255 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ne_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_ge_i16 vcc, v128, v2 +// GFX11: v_cmp_ge_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x36,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ne_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_ge_i16 vcc, v128, v2 +// GFX11: v_cmp_ge_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x36,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ne_i16 vcc, v1, v255 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ne_i16 vcc, v127, v255 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ne_i16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ne_i16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ne_u16 vcc, v1, v255 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_ge_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ne_u16 vcc, v127, v255 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_ge_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ne_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_ge_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ne_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_ge_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ne_u16 vcc, v1, v255 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_ge_u16 vcc, v1, v255 +// GFX11: v_cmp_ge_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ne_u16 vcc, v127, v255 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_ge_u16 vcc, v1, v255 +// GFX11: v_cmp_ge_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ne_u16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_ne_u16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_neq_f16 vcc, v1, v255 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_neq_f16 vcc, v127, v255 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_neq_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_ge_u16 vcc, v127, v255 +// GFX11: v_cmp_ge_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_neq_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_ge_u16 vcc, v127, v255 +// GFX11: v_cmp_ge_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_neq_f16 vcc, v1, v255 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_neq_f16 vcc, v127, v255 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_neq_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_neq_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_nge_f16 vcc, v1, v255 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_ge_u16 vcc, v128, v2 +// GFX11: v_cmp_ge_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3e,0xd4,0x80,0x05,0x02,0x00] -v_cmp_nge_f16 vcc, v127, v255 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_ge_u16 vcc, v128, v2 +// GFX11: v_cmp_ge_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3e,0xd4,0x80,0x05,0x02,0x00] -v_cmp_nge_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_nge_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_nge_f16 vcc, v1, v255 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nge_f16 vcc, v127, v255 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ge_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nge_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_ge_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nge_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_ge_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ge_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ngt_f16 vcc, v1, v255 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_ge_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ngt_f16 vcc, v127, v255 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_ge_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ge_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ngt_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_gt_f16 vcc, v1, v255 +// GFX11: v_cmp_gt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ngt_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_gt_f16 vcc, v1, v255 +// GFX11: v_cmp_gt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ngt_f16 vcc, v1, v255 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_ngt_f16 vcc, v127, v255 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_ngt_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_ngt_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_nle_f16 vcc, v1, v255 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_gt_f16 vcc, v127, v255 +// GFX11: v_cmp_gt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_nle_f16 vcc, v127, v255 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_gt_f16 vcc, v127, v255 +// GFX11: v_cmp_gt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_nle_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_nle_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_nle_f16 vcc, v1, v255 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_nle_f16 vcc, v127, v255 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_nle_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_gt_f16 vcc, v128, v2 +// GFX11: v_cmp_gt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00] -v_cmp_nle_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_gt_f16 vcc, v128, v2 +// GFX11: v_cmp_gt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00] -v_cmp_nlg_f16 vcc, v1, v255 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_nlg_f16 vcc, v127, v255 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_nlg_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nlg_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nlg_f16 vcc, v1, v255 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_gt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nlg_f16 vcc, v127, v255 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_gt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nlg_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_gt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nlg_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_gt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nlt_f16 vcc, v1, v255 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_gt_i16 vcc, v1, v255 +// GFX11: v_cmp_gt_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nlt_f16 vcc, v127, v255 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_gt_i16 vcc, v1, v255 +// GFX11: v_cmp_gt_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nlt_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nlt_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nlt_f16 vcc, v1, v255 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_nlt_f16 vcc, v127, v255 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_nlt_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_gt_i16 vcc, v127, v255 +// GFX11: v_cmp_gt_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_nlt_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_gt_i16 vcc, v127, v255 +// GFX11: v_cmp_gt_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_o_f16 vcc, v1, v255 -// GFX11: v_cmp_o_f16_e64 +v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_o_f16 vcc, v127, v255 -// GFX11: v_cmp_o_f16_e64 +v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_o_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_o_f16_e64 +v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_o_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_o_f16_e64 +v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_o_f16 vcc, v1, v255 -// GFX11: v_cmp_o_f16_e64 +v_cmp_gt_i16 vcc, v128, v2 +// GFX11: v_cmp_gt_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x34,0xd4,0x80,0x05,0x02,0x00] -v_cmp_o_f16 vcc, v127, v255 -// GFX11: v_cmp_o_f16_e64 +v_cmp_gt_i16 vcc, v128, v2 +// GFX11: v_cmp_gt_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x34,0xd4,0x80,0x05,0x02,0x00] -v_cmp_o_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_o_f16_e64 +v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_o_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_o_f16_e64 +v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_t_f16 vcc, v1, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_t_f16 vcc, v127, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_t_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_t_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_t_f16 vcc, v1, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_t_f16 vcc, v127, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_t_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v1, v255 +// GFX11: v_cmp_gt_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00] -v_cmp_t_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v1, v255 +// GFX11: v_cmp_gt_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00] -v_cmp_tru_f16 vcc, v1, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_tru_f16 vcc, v127, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_tru_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_tru_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_tru_f16 vcc, v1, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v127, v255 +// GFX11: v_cmp_gt_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_tru_f16 vcc, v127, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v127, v255 +// GFX11: v_cmp_gt_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_tru_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_tru_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_t_f16_e64 +v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_u_f16 vcc, v1, v255 -// GFX11: v_cmp_u_f16_e64 +v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_u_f16 vcc, v127, v255 -// GFX11: v_cmp_u_f16_e64 +v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_u_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_u_f16_e64 +v_cmp_gt_u16 vcc, v128, v2 +// GFX11: v_cmp_gt_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3c,0xd4,0x80,0x05,0x02,0x00] -v_cmp_u_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_u_f16_e64 +v_cmp_gt_u16 vcc, v128, v2 +// GFX11: v_cmp_gt_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3c,0xd4,0x80,0x05,0x02,0x00] -v_cmp_u_f16 vcc, v1, v255 -// GFX11: v_cmp_u_f16_e64 +v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_u_f16 vcc, v127, v255 -// GFX11: v_cmp_u_f16_e64 +v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_u_f16 vcc, vcc_hi, v255 -// GFX11: v_cmp_u_f16_e64 +v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_u_f16 vcc, vcc_lo, v255 -// GFX11: v_cmp_u_f16_e64 +v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_gt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_class_f16 vcc, v128, v2 -// GFX11: v_cmp_class_f16_e64 +v_cmp_gt_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_class_f16 vcc, v128, v2 -// GFX11: v_cmp_class_f16_e64 +v_cmp_gt_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_gt_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_eq_f16 vcc, v128, v2 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_gt_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_eq_f16 vcc, v128, v2 -// GFX11: v_cmp_eq_f16_e64 +v_cmp_gt_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_gt_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, v128, v2 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_le_f16 vcc, v1, v255 +// GFX11: v_cmp_le_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_i16 vcc, v128, v2 -// GFX11: v_cmp_eq_i16_e64 +v_cmp_le_f16 vcc, v1, v255 +// GFX11: v_cmp_le_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_u16 vcc, v128, v2 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_u16 vcc, v128, v2 -// GFX11: v_cmp_eq_u16_e64 +v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_f_f16 vcc, v128, v2 -// GFX11: v_cmp_f_f16_e64 +v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_f_f16 vcc, v128, v2 -// GFX11: v_cmp_f_f16_e64 +v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_ge_f16 vcc, v128, v2 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_le_f16 vcc, v127, v255 +// GFX11: v_cmp_le_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_ge_f16 vcc, v128, v2 -// GFX11: v_cmp_ge_f16_e64 +v_cmp_le_f16 vcc, v127, v255 +// GFX11: v_cmp_le_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_ge_i16 vcc, v128, v2 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ge_i16 vcc, v128, v2 -// GFX11: v_cmp_ge_i16_e64 +v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ge_u16 vcc, v128, v2 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_u16 vcc, v128, v2 -// GFX11: v_cmp_ge_u16_e64 +v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_gt_f16 vcc, v128, v2 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_le_f16 vcc, v128, v2 +// GFX11: v_cmp_le_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00] -v_cmp_gt_f16 vcc, v128, v2 -// GFX11: v_cmp_gt_f16_e64 +v_cmp_le_f16 vcc, v128, v2 +// GFX11: v_cmp_le_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00] -v_cmp_gt_i16 vcc, v128, v2 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_gt_i16 vcc, v128, v2 -// GFX11: v_cmp_gt_i16_e64 +v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_gt_u16 vcc, v128, v2 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_gt_u16 vcc, v128, v2 -// GFX11: v_cmp_gt_u16_e64 +v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_le_f16 vcc, v128, v2 -// GFX11: v_cmp_le_f16_e64 +v_cmp_le_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_le_f16 vcc, v128, v2 -// GFX11: v_cmp_le_f16_e64 +v_cmp_le_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_le_i16 vcc, v128, v2 -// GFX11: v_cmp_le_i16_e64 +v_cmp_le_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_i16 vcc, v128, v2 -// GFX11: v_cmp_le_i16_e64 +v_cmp_le_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, v128, v2 -// GFX11: v_cmp_le_u16_e64 +v_cmp_le_i16 vcc, v1, v255 +// GFX11: v_cmp_le_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_u16 vcc, v128, v2 -// GFX11: v_cmp_le_u16_e64 +v_cmp_le_i16 vcc, v1, v255 +// GFX11: v_cmp_le_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00] -v_cmp_lg_f16 vcc, v128, v2 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lg_f16 vcc, v128, v2 -// GFX11: v_cmp_lg_f16_e64 +v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lt_f16 vcc, v128, v2 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lt_f16 vcc, v128, v2 -// GFX11: v_cmp_lt_f16_e64 +v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lt_i16 vcc, v128, v2 -// GFX11: v_cmp_lt_i16_e64 +v_cmp_le_i16 vcc, v127, v255 +// GFX11: v_cmp_le_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_i16 vcc, v128, v2 -// GFX11: v_cmp_lt_i16_e64 +v_cmp_le_i16 vcc, v127, v255 +// GFX11: v_cmp_le_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_u16 vcc, v128, v2 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lt_u16 vcc, v128, v2 -// GFX11: v_cmp_lt_u16_e64 +v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ne_i16 vcc, v128, v2 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ne_i16 vcc, v128, v2 -// GFX11: v_cmp_ne_i16_e64 +v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ne_u16 vcc, v128, v2 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_le_i16 vcc, v128, v2 +// GFX11: v_cmp_le_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x33,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ne_u16 vcc, v128, v2 -// GFX11: v_cmp_ne_u16_e64 +v_cmp_le_i16 vcc, v128, v2 +// GFX11: v_cmp_le_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x33,0xd4,0x80,0x05,0x02,0x00] -v_cmp_neq_f16 vcc, v128, v2 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_neq_f16 vcc, v128, v2 -// GFX11: v_cmp_neq_f16_e64 +v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_nge_f16 vcc, v128, v2 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nge_f16 vcc, v128, v2 -// GFX11: v_cmp_nge_f16_e64 +v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ngt_f16 vcc, v128, v2 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_le_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ngt_f16 vcc, v128, v2 -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_le_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nle_f16 vcc, v128, v2 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_le_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nle_f16 vcc, v128, v2 -// GFX11: v_cmp_nle_f16_e64 +v_cmp_le_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nlg_f16 vcc, v128, v2 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_le_u16 vcc, v1, v255 +// GFX11: v_cmp_le_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nlg_f16 vcc, v128, v2 -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_le_u16 vcc, v1, v255 +// GFX11: v_cmp_le_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nlt_f16 vcc, v128, v2 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nlt_f16 vcc, v128, v2 -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_o_f16 vcc, v128, v2 -// GFX11: v_cmp_o_f16_e64 +v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_o_f16 vcc, v128, v2 -// GFX11: v_cmp_o_f16_e64 +v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_t_f16 vcc, v128, v2 -// GFX11: v_cmp_t_f16_e64 +v_cmp_le_u16 vcc, v127, v255 +// GFX11: v_cmp_le_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_t_f16 vcc, v128, v2 -// GFX11: v_cmp_t_f16_e64 +v_cmp_le_u16 vcc, v127, v255 +// GFX11: v_cmp_le_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_tru_f16 vcc, v128, v2 -// GFX11: v_cmp_t_f16_e64 +v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_tru_f16 vcc, v128, v2 -// GFX11: v_cmp_t_f16_e64 +v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_u_f16 vcc, v128, v2 -// GFX11: v_cmp_u_f16_e64 +v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_u_f16 vcc, v128, v2 -// GFX11: v_cmp_u_f16_e64 +v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_le_u16 vcc, v128, v2 +// GFX11: v_cmp_le_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3b,0xd4,0x80,0x05,0x02,0x00] -v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_le_u16 vcc, v128, v2 +// GFX11: v_cmp_le_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3b,0xd4,0x80,0x05,0x02,0x00] -v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_le_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_le_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_le_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_le_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_le_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_le_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_le_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_lg_f16 vcc, v1, v255 +// GFX11: v_cmp_lg_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_lg_f16 vcc, v1, v255 +// GFX11: v_cmp_lg_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_lg_f16 vcc, v127, v255 +// GFX11: v_cmp_lg_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_lg_f16 vcc, v127, v255 +// GFX11: v_cmp_lg_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_lg_f16 vcc, v128, v2 +// GFX11: v_cmp_lg_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_lg_f16 vcc, v128, v2 +// GFX11: v_cmp_lg_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_lg_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_lg_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_lg_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_lg_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_lt_f16 vcc, v1, v255 +// GFX11: v_cmp_lt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00] -v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_lt_f16 vcc, v1, v255 +// GFX11: v_cmp_lt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00] -v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_lt_f16 vcc, v127, v255 +// GFX11: v_cmp_lt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_lt_f16 vcc, v127, v255 +// GFX11: v_cmp_lt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_lt_f16 vcc, v128, v2 +// GFX11: v_cmp_lt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x01,0xd4,0x80,0x05,0x02,0x00] -v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_lt_f16 vcc, v128, v2 +// GFX11: v_cmp_lt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x01,0xd4,0x80,0x05,0x02,0x00] -v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_lt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_lt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_lt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_lt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_lt_i16 vcc, v1, v255 +// GFX11: v_cmp_lt_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00] -v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_lt_i16 vcc, v1, v255 +// GFX11: v_cmp_lt_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00] -v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_lt_i16 vcc, v127, v255 +// GFX11: v_cmp_lt_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_lt_i16 vcc, v127, v255 +// GFX11: v_cmp_lt_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 - -v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_lt_i16 vcc, v128, v2 +// GFX11: v_cmp_lt_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x31,0xd4,0x80,0x05,0x02,0x00] -v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_lt_i16 vcc, v128, v2 +// GFX11: v_cmp_lt_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x31,0xd4,0x80,0x05,0x02,0x00] -v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_lt_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_lt_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_lt_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_lt_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_lt_u16 vcc, v1, v255 +// GFX11: v_cmp_lt_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_lt_u16 vcc, v1, v255 +// GFX11: v_cmp_lt_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00] -v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_lt_u16 vcc, v127, v255 +// GFX11: v_cmp_lt_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_lt_u16 vcc, v127, v255 +// GFX11: v_cmp_lt_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_lt_u16 vcc, v128, v2 +// GFX11: v_cmp_lt_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x39,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_lt_u16 vcc, v128, v2 +// GFX11: v_cmp_lt_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x39,0xd4,0x80,0x05,0x02,0x00] -v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_lt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_lt_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_lt_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_lt_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_lt_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_lt_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_lt_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_ne_i16 vcc, v1, v255 +// GFX11: v_cmp_ne_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_ne_i16 vcc, v1, v255 +// GFX11: v_cmp_ne_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +v_cmp_ne_i16 vcc, v127, v255 +// GFX11: v_cmp_ne_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +v_cmp_ne_i16 vcc, v127, v255 +// GFX11: v_cmp_ne_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_ne_i16 vcc, v128, v2 +// GFX11: v_cmp_ne_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x35,0xd4,0x80,0x05,0x02,0x00] -v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_ne_i16 vcc, v128, v2 +// GFX11: v_cmp_ne_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x35,0xd4,0x80,0x05,0x02,0x00] -v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_ne_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ne_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_ne_i16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ne_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_ne_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ne_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_ne_i16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ne_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_ne_u16 vcc, v1, v255 +// GFX11: v_cmp_ne_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_ne_u16 vcc, v1, v255 +// GFX11: v_cmp_ne_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_ne_u16 vcc, v127, v255 +// GFX11: v_cmp_ne_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_ne_u16 vcc, v127, v255 +// GFX11: v_cmp_ne_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_ne_u16 vcc, v128, v2 +// GFX11: v_cmp_ne_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3d,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_ne_u16 vcc, v128, v2 +// GFX11: v_cmp_ne_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3d,0xd4,0x80,0x05,0x02,0x00] -v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ne_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_ne_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ne_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_ne_u16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ne_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_ne_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ne_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_ne_u16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ne_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_neq_f16 vcc, v1, v255 +// GFX11: v_cmp_neq_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_neq_f16 vcc, v1, v255 +// GFX11: v_cmp_neq_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_neq_f16 vcc, v127, v255 +// GFX11: v_cmp_neq_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_neq_f16 vcc, v127, v255 +// GFX11: v_cmp_neq_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_neq_f16 vcc, v128, v2 +// GFX11: v_cmp_neq_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_neq_f16 vcc, v128, v2 +// GFX11: v_cmp_neq_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_neq_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_neq_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_neq_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_neq_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_nge_f16 vcc, v1, v255 +// GFX11: v_cmp_nge_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_nge_f16 vcc, v1, v255 +// GFX11: v_cmp_nge_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +v_cmp_nge_f16 vcc, v127, v255 +// GFX11: v_cmp_nge_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +v_cmp_nge_f16 vcc, v127, v255 +// GFX11: v_cmp_nge_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_nge_f16 vcc, v128, v2 +// GFX11: v_cmp_nge_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00] -v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_nge_f16 vcc, v128, v2 +// GFX11: v_cmp_nge_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00] -v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_nge_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_nge_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_nge_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_nge_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_ngt_f16 vcc, v1, v255 +// GFX11: v_cmp_ngt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_ngt_f16 vcc, v1, v255 +// GFX11: v_cmp_ngt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_ngt_f16 vcc, v127, v255 +// GFX11: v_cmp_ngt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_ngt_f16 vcc, v127, v255 +// GFX11: v_cmp_ngt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_ngt_f16 vcc, v128, v2 +// GFX11: v_cmp_ngt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_ngt_f16 vcc, v128, v2 +// GFX11: v_cmp_ngt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_ngt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_ngt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_ngt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_ngt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_nle_f16 vcc, v1, v255 +// GFX11: v_cmp_nle_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00] -v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_nle_f16 vcc, v1, v255 +// GFX11: v_cmp_nle_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00] -v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_nle_f16 vcc, v127, v255 +// GFX11: v_cmp_nle_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_nle_f16 vcc, v127, v255 +// GFX11: v_cmp_nle_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_nle_f16 vcc, v128, v2 +// GFX11: v_cmp_nle_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00] -v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_nle_f16 vcc, v128, v2 +// GFX11: v_cmp_nle_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00] -v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_nle_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_nle_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_nle_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_nle_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_nlg_f16 vcc, v1, v255 +// GFX11: v_cmp_nlg_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_nlg_f16 vcc, v1, v255 +// GFX11: v_cmp_nlg_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_nlg_f16 vcc, v127, v255 +// GFX11: v_cmp_nlg_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_nlg_f16 vcc, v127, v255 +// GFX11: v_cmp_nlg_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +v_cmp_nlg_f16 vcc, v128, v2 +// GFX11: v_cmp_nlg_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00] -v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +v_cmp_nlg_f16 vcc, v128, v2 +// GFX11: v_cmp_nlg_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00] -v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_nlg_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_nlg_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_nlg_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_nlg_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_nlt_f16 vcc, v1, v255 +// GFX11: v_cmp_nlt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_nlt_f16 vcc, v1, v255 +// GFX11: v_cmp_nlt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_nlt_f16 vcc, v127, v255 +// GFX11: v_cmp_nlt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_nlt_f16 vcc, v127, v255 +// GFX11: v_cmp_nlt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_nlt_f16 vcc, v128, v2 +// GFX11: v_cmp_nlt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_nlt_f16 vcc, v128, v2 +// GFX11: v_cmp_nlt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_nlt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_nlt_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_nlt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_nlt_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_o_f16 vcc, v1, v255 +// GFX11: v_cmp_o_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_o_f16 vcc, v1, v255 +// GFX11: v_cmp_o_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_o_f16 vcc, v127, v255 +// GFX11: v_cmp_o_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +v_cmp_o_f16 vcc, v127, v255 +// GFX11: v_cmp_o_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00] v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_o_f16_e64 - -v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_o_f16 vcc, v128, v2 +// GFX11: v_cmp_o_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00] -v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_o_f16 vcc, v128, v2 +// GFX11: v_cmp_o_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00] -v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_o_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_o_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_o_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +v_cmp_o_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_t_f16 vcc, v1, v255 +// GFX11: v_cmp_t_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00] -v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_class_f16_e64 +v_cmp_t_f16 vcc, v1, v255 +// GFX11: v_cmp_t_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00] -v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_f16_e64 +v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_i16_e64 +v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_t_f16 vcc, v127, v255 +// GFX11: v_cmp_t_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_eq_u16_e64 +v_cmp_t_f16 vcc, v127, v255 +// GFX11: v_cmp_t_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_f_f16_e64 +v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_f16_e64 +v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_t_f16 vcc, v128, v2 +// GFX11: v_cmp_t_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_i16_e64 +v_cmp_t_f16 vcc, v128, v2 +// GFX11: v_cmp_t_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ge_u16_e64 +v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_f16_e64 +v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_t_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_i16_e64 +v_cmp_t_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_t_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_gt_u16_e64 +v_cmp_t_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_tru_f16 vcc, v1, v255 +// GFX11: v_cmp_t_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_f16_e64 +v_cmp_tru_f16 vcc, v1, v255 +// GFX11: v_cmp_t_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00] -v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_i16_e64 +v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_le_u16_e64 +v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_tru_f16 vcc, v127, v255 +// GFX11: v_cmp_t_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lg_f16_e64 +v_cmp_tru_f16 vcc, v127, v255 +// GFX11: v_cmp_t_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_f16_e64 +v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_i16_e64 +v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_tru_f16 vcc, v128, v2 +// GFX11: v_cmp_t_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00] -v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_lt_u16_e64 +v_cmp_tru_f16 vcc, v128, v2 +// GFX11: v_cmp_t_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00] -v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_i16_e64 +v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] -v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ne_u16_e64 +v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] -v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_tru_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_neq_f16_e64 +v_cmp_tru_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00] -v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_tru_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nge_f16_e64 +v_cmp_tru_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00] -v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_u_f16 vcc, v1, v255 +// GFX11: v_cmp_u_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00] -v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_ngt_f16_e64 +v_cmp_u_f16 vcc, v1, v255 +// GFX11: v_cmp_u_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00] -v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nle_f16_e64 +v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlg_f16_e64 +v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_u_f16 vcc, v127, v255 +// GFX11: v_cmp_u_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_nlt_f16_e64 +v_cmp_u_f16 vcc, v127, v255 +// GFX11: v_cmp_u_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00] -v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_o_f16_e64 +v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] -v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] -v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_u_f16 vcc, v128, v2 +// GFX11: v_cmp_u_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00] -v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_t_f16_e64 +v_cmp_u_f16 vcc, v128, v2 +// GFX11: v_cmp_u_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00] v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmp_u_f16_e64 +// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] + +v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] + +v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] + +v_cmp_u_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00] + +v_cmp_u_f16 vcc, vcc_hi, v255 +// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00] + +v_cmp_u_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00] +v_cmp_u_f16 vcc, vcc_lo, v255 +// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx-fake16.s new file mode 100644 index 000000000000000..33a5e7c140b5f2e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx-fake16.s @@ -0,0 +1,4106 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s + +v_cmpx_class_f16_e32 v1, v2 +// GFX11: encoding: [0x01,0x05,0xfa,0x7d] + +v_cmpx_class_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0xfa,0x7d] + +v_cmpx_class_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0xfa,0x7d] + +v_cmpx_class_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0xfa,0x7d] + +v_cmpx_class_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0xfa,0x7d] + +v_cmpx_class_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0xfa,0x7d] + +v_cmpx_class_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0xfa,0x7d] + +v_cmpx_class_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0xfa,0x7d] + +v_cmpx_class_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0xfa,0x7d] + +v_cmpx_class_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0xfa,0x7d] + +v_cmpx_class_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0xfa,0x7d] + +v_cmpx_class_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0xfa,0x7d] + +v_cmpx_class_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0xfa,0x7d] + +v_cmpx_class_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0xfa,0x7d] + +v_cmpx_class_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfa,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_class_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0xfc,0x7d] + +v_cmpx_class_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0xfc,0x7d] + +v_cmpx_class_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0xfc,0x7d] + +v_cmpx_class_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0xfc,0x7d] + +v_cmpx_class_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0xfc,0x7d] + +v_cmpx_class_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0xfc,0x7d] + +v_cmpx_class_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0xfc,0x7d] + +v_cmpx_class_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0xfc,0x7d] + +v_cmpx_class_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0xfc,0x7d] + +v_cmpx_class_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0xfc,0x7d] + +v_cmpx_class_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0xfc,0x7d] + +v_cmpx_class_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0xfc,0x7d] + +v_cmpx_class_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0xfc,0x7d] + +v_cmpx_class_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0xfc,0x7d] + +v_cmpx_class_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xfd,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_class_f64 v[1:2], v2 +// GFX11: encoding: [0x01,0x05,0xfe,0x7d] + +v_cmpx_class_f64 v[254:255], v2 +// GFX11: encoding: [0xfe,0x05,0xfe,0x7d] + +v_cmpx_class_f64 s[2:3], v2 +// GFX11: encoding: [0x02,0x04,0xfe,0x7d] + +v_cmpx_class_f64 s[104:105], v2 +// GFX11: encoding: [0x68,0x04,0xfe,0x7d] + +v_cmpx_class_f64 vcc, v2 +// GFX11: encoding: [0x6a,0x04,0xfe,0x7d] + +v_cmpx_class_f64 ttmp[14:15], v2 +// GFX11: encoding: [0x7a,0x04,0xfe,0x7d] + +v_cmpx_class_f64 exec, v2 +// GFX11: encoding: [0x7e,0x04,0xfe,0x7d] + +v_cmpx_class_f64 null, v2 +// GFX11: encoding: [0x7c,0x04,0xfe,0x7d] + +v_cmpx_class_f64 -1, v2 +// GFX11: encoding: [0xc1,0x04,0xfe,0x7d] + +v_cmpx_class_f64 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0xfe,0x7d] + +v_cmpx_class_f64 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0xfe,0x7d] + +v_cmpx_class_f64 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x04,0x7d] + +v_cmpx_eq_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x04,0x7d] + +v_cmpx_eq_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x04,0x7d] + +v_cmpx_eq_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x04,0x7d] + +v_cmpx_eq_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x04,0x7d] + +v_cmpx_eq_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x04,0x7d] + +v_cmpx_eq_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x04,0x7d] + +v_cmpx_eq_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x04,0x7d] + +v_cmpx_eq_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x04,0x7d] + +v_cmpx_eq_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x04,0x7d] + +v_cmpx_eq_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x04,0x7d] + +v_cmpx_eq_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x04,0x7d] + +v_cmpx_eq_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x04,0x7d] + +v_cmpx_eq_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x04,0x7d] + +v_cmpx_eq_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x04,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x24,0x7d] + +v_cmpx_eq_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x24,0x7d] + +v_cmpx_eq_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x24,0x7d] + +v_cmpx_eq_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x24,0x7d] + +v_cmpx_eq_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x24,0x7d] + +v_cmpx_eq_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x24,0x7d] + +v_cmpx_eq_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x24,0x7d] + +v_cmpx_eq_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x24,0x7d] + +v_cmpx_eq_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x24,0x7d] + +v_cmpx_eq_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x24,0x7d] + +v_cmpx_eq_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x24,0x7d] + +v_cmpx_eq_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x24,0x7d] + +v_cmpx_eq_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x24,0x7d] + +v_cmpx_eq_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x24,0x7d] + +v_cmpx_eq_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x25,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x44,0x7d] + +v_cmpx_eq_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x44,0x7d] + +v_cmpx_eq_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x44,0x7d] + +v_cmpx_eq_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x44,0x7d] + +v_cmpx_eq_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x44,0x7d] + +v_cmpx_eq_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x44,0x7d] + +v_cmpx_eq_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x44,0x7d] + +v_cmpx_eq_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x44,0x7d] + +v_cmpx_eq_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x44,0x7d] + +v_cmpx_eq_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x44,0x7d] + +v_cmpx_eq_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x44,0x7d] + +v_cmpx_eq_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x45,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_i16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x64,0x7d] + +v_cmpx_eq_i16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x64,0x7d] + +v_cmpx_eq_i16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x64,0x7d] + +v_cmpx_eq_i16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x64,0x7d] + +v_cmpx_eq_i16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x64,0x7d] + +v_cmpx_eq_i16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x64,0x7d] + +v_cmpx_eq_i16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x64,0x7d] + +v_cmpx_eq_i16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x64,0x7d] + +v_cmpx_eq_i16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x64,0x7d] + +v_cmpx_eq_i16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x64,0x7d] + +v_cmpx_eq_i16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x64,0x7d] + +v_cmpx_eq_i16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x64,0x7d] + +v_cmpx_eq_i16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x64,0x7d] + +v_cmpx_eq_i16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x64,0x7d] + +v_cmpx_eq_i16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x64,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_i32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x84,0x7d] + +v_cmpx_eq_i32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x84,0x7d] + +v_cmpx_eq_i32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x84,0x7d] + +v_cmpx_eq_i32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x84,0x7d] + +v_cmpx_eq_i32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x84,0x7d] + +v_cmpx_eq_i32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x84,0x7d] + +v_cmpx_eq_i32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x84,0x7d] + +v_cmpx_eq_i32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x84,0x7d] + +v_cmpx_eq_i32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x84,0x7d] + +v_cmpx_eq_i32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x84,0x7d] + +v_cmpx_eq_i32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x84,0x7d] + +v_cmpx_eq_i32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x84,0x7d] + +v_cmpx_eq_i32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x84,0x7d] + +v_cmpx_eq_i32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x84,0x7d] + +v_cmpx_eq_i32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x85,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_i64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xa4,0x7d] + +v_cmpx_eq_i64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xa4,0x7d] + +v_cmpx_eq_i64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xa5,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_u16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x74,0x7d] + +v_cmpx_eq_u16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x74,0x7d] + +v_cmpx_eq_u16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x74,0x7d] + +v_cmpx_eq_u16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x74,0x7d] + +v_cmpx_eq_u16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x74,0x7d] + +v_cmpx_eq_u16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x74,0x7d] + +v_cmpx_eq_u16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x74,0x7d] + +v_cmpx_eq_u16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x74,0x7d] + +v_cmpx_eq_u16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x74,0x7d] + +v_cmpx_eq_u16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x74,0x7d] + +v_cmpx_eq_u16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x74,0x7d] + +v_cmpx_eq_u16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x74,0x7d] + +v_cmpx_eq_u16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x74,0x7d] + +v_cmpx_eq_u16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x74,0x7d] + +v_cmpx_eq_u16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x74,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_u32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x94,0x7d] + +v_cmpx_eq_u32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x94,0x7d] + +v_cmpx_eq_u32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x94,0x7d] + +v_cmpx_eq_u32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x94,0x7d] + +v_cmpx_eq_u32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x94,0x7d] + +v_cmpx_eq_u32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x94,0x7d] + +v_cmpx_eq_u32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x94,0x7d] + +v_cmpx_eq_u32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x94,0x7d] + +v_cmpx_eq_u32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x94,0x7d] + +v_cmpx_eq_u32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x94,0x7d] + +v_cmpx_eq_u32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x94,0x7d] + +v_cmpx_eq_u32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x94,0x7d] + +v_cmpx_eq_u32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x94,0x7d] + +v_cmpx_eq_u32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x94,0x7d] + +v_cmpx_eq_u32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x95,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_u64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xb4,0x7d] + +v_cmpx_eq_u64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xb4,0x7d] + +v_cmpx_eq_u64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xb5,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_f_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x00,0x7d] + +v_cmpx_f_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x00,0x7d] + +v_cmpx_f_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x00,0x7d] + +v_cmpx_f_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x00,0x7d] + +v_cmpx_f_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x00,0x7d] + +v_cmpx_f_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x00,0x7d] + +v_cmpx_f_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x00,0x7d] + +v_cmpx_f_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x00,0x7d] + +v_cmpx_f_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x00,0x7d] + +v_cmpx_f_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x00,0x7d] + +v_cmpx_f_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x00,0x7d] + +v_cmpx_f_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x00,0x7d] + +v_cmpx_f_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x00,0x7d] + +v_cmpx_f_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x00,0x7d] + +v_cmpx_f_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x00,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_f_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x20,0x7d] + +v_cmpx_f_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x20,0x7d] + +v_cmpx_f_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x20,0x7d] + +v_cmpx_f_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x20,0x7d] + +v_cmpx_f_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x20,0x7d] + +v_cmpx_f_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x20,0x7d] + +v_cmpx_f_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x20,0x7d] + +v_cmpx_f_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x20,0x7d] + +v_cmpx_f_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x20,0x7d] + +v_cmpx_f_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x20,0x7d] + +v_cmpx_f_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x20,0x7d] + +v_cmpx_f_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x20,0x7d] + +v_cmpx_f_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x20,0x7d] + +v_cmpx_f_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x20,0x7d] + +v_cmpx_f_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x21,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_f_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x40,0x7d] + +v_cmpx_f_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x40,0x7d] + +v_cmpx_f_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x40,0x7d] + +v_cmpx_f_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x40,0x7d] + +v_cmpx_f_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x40,0x7d] + +v_cmpx_f_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x40,0x7d] + +v_cmpx_f_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x40,0x7d] + +v_cmpx_f_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x40,0x7d] + +v_cmpx_f_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x40,0x7d] + +v_cmpx_f_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x40,0x7d] + +v_cmpx_f_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x40,0x7d] + +v_cmpx_f_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x41,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_f_i32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x80,0x7d] + +v_cmpx_f_i32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x80,0x7d] + +v_cmpx_f_i32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x80,0x7d] + +v_cmpx_f_i32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x80,0x7d] + +v_cmpx_f_i32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x80,0x7d] + +v_cmpx_f_i32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x80,0x7d] + +v_cmpx_f_i32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x80,0x7d] + +v_cmpx_f_i32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x80,0x7d] + +v_cmpx_f_i32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x80,0x7d] + +v_cmpx_f_i32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x80,0x7d] + +v_cmpx_f_i32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x80,0x7d] + +v_cmpx_f_i32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x80,0x7d] + +v_cmpx_f_i32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x80,0x7d] + +v_cmpx_f_i32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x80,0x7d] + +v_cmpx_f_i32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x81,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_f_i64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xa0,0x7d] + +v_cmpx_f_i64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xa0,0x7d] + +v_cmpx_f_i64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xa0,0x7d] + +v_cmpx_f_i64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xa0,0x7d] + +v_cmpx_f_i64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xa0,0x7d] + +v_cmpx_f_i64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xa0,0x7d] + +v_cmpx_f_i64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xa0,0x7d] + +v_cmpx_f_i64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xa0,0x7d] + +v_cmpx_f_i64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xa0,0x7d] + +v_cmpx_f_i64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xa0,0x7d] + +v_cmpx_f_i64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xa0,0x7d] + +v_cmpx_f_i64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xa1,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_f_u32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x90,0x7d] + +v_cmpx_f_u32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x90,0x7d] + +v_cmpx_f_u32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x90,0x7d] + +v_cmpx_f_u32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x90,0x7d] + +v_cmpx_f_u32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x90,0x7d] + +v_cmpx_f_u32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x90,0x7d] + +v_cmpx_f_u32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x90,0x7d] + +v_cmpx_f_u32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x90,0x7d] + +v_cmpx_f_u32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x90,0x7d] + +v_cmpx_f_u32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x90,0x7d] + +v_cmpx_f_u32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x90,0x7d] + +v_cmpx_f_u32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x90,0x7d] + +v_cmpx_f_u32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x90,0x7d] + +v_cmpx_f_u32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x90,0x7d] + +v_cmpx_f_u32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x91,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_f_u64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xb0,0x7d] + +v_cmpx_f_u64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xb0,0x7d] + +v_cmpx_f_u64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xb0,0x7d] + +v_cmpx_f_u64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xb0,0x7d] + +v_cmpx_f_u64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xb0,0x7d] + +v_cmpx_f_u64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xb0,0x7d] + +v_cmpx_f_u64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xb0,0x7d] + +v_cmpx_f_u64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xb0,0x7d] + +v_cmpx_f_u64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xb0,0x7d] + +v_cmpx_f_u64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xb0,0x7d] + +v_cmpx_f_u64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xb0,0x7d] + +v_cmpx_f_u64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xb1,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x0c,0x7d] + +v_cmpx_ge_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0c,0x7d] + +v_cmpx_ge_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x0c,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x2c,0x7d] + +v_cmpx_ge_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x2c,0x7d] + +v_cmpx_ge_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x2d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x4c,0x7d] + +v_cmpx_ge_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x4c,0x7d] + +v_cmpx_ge_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x4d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_i16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x6c,0x7d] + +v_cmpx_ge_i16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x6c,0x7d] + +v_cmpx_ge_i16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x6c,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_i32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x8c,0x7d] + +v_cmpx_ge_i32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x8c,0x7d] + +v_cmpx_ge_i32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x8d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_i64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xac,0x7d] + +v_cmpx_ge_i64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xac,0x7d] + +v_cmpx_ge_i64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xac,0x7d] + +v_cmpx_ge_i64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xac,0x7d] + +v_cmpx_ge_i64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xac,0x7d] + +v_cmpx_ge_i64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xac,0x7d] + +v_cmpx_ge_i64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xac,0x7d] + +v_cmpx_ge_i64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xac,0x7d] + +v_cmpx_ge_i64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xac,0x7d] + +v_cmpx_ge_i64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xac,0x7d] + +v_cmpx_ge_i64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xac,0x7d] + +v_cmpx_ge_i64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xad,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_u16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x7c,0x7d] + +v_cmpx_ge_u16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x7c,0x7d] + +v_cmpx_ge_u16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x7c,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_u32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x9c,0x7d] + +v_cmpx_ge_u32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x9c,0x7d] + +v_cmpx_ge_u32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x9d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_u64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xbc,0x7d] + +v_cmpx_ge_u64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xbc,0x7d] + +v_cmpx_ge_u64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xbd,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x08,0x7d] + +v_cmpx_gt_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x08,0x7d] + +v_cmpx_gt_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x08,0x7d] + +v_cmpx_gt_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x08,0x7d] + +v_cmpx_gt_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x08,0x7d] + +v_cmpx_gt_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x08,0x7d] + +v_cmpx_gt_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x08,0x7d] + +v_cmpx_gt_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x08,0x7d] + +v_cmpx_gt_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x08,0x7d] + +v_cmpx_gt_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x08,0x7d] + +v_cmpx_gt_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x08,0x7d] + +v_cmpx_gt_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x08,0x7d] + +v_cmpx_gt_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x08,0x7d] + +v_cmpx_gt_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x08,0x7d] + +v_cmpx_gt_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x08,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x28,0x7d] + +v_cmpx_gt_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x28,0x7d] + +v_cmpx_gt_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x28,0x7d] + +v_cmpx_gt_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x28,0x7d] + +v_cmpx_gt_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x28,0x7d] + +v_cmpx_gt_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x28,0x7d] + +v_cmpx_gt_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x28,0x7d] + +v_cmpx_gt_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x28,0x7d] + +v_cmpx_gt_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x28,0x7d] + +v_cmpx_gt_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x28,0x7d] + +v_cmpx_gt_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x28,0x7d] + +v_cmpx_gt_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x28,0x7d] + +v_cmpx_gt_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x28,0x7d] + +v_cmpx_gt_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x28,0x7d] + +v_cmpx_gt_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x29,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x48,0x7d] + +v_cmpx_gt_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x48,0x7d] + +v_cmpx_gt_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x48,0x7d] + +v_cmpx_gt_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x48,0x7d] + +v_cmpx_gt_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x48,0x7d] + +v_cmpx_gt_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x48,0x7d] + +v_cmpx_gt_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x48,0x7d] + +v_cmpx_gt_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x48,0x7d] + +v_cmpx_gt_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x48,0x7d] + +v_cmpx_gt_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x48,0x7d] + +v_cmpx_gt_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x48,0x7d] + +v_cmpx_gt_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x49,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_i16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x68,0x7d] + +v_cmpx_gt_i16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x68,0x7d] + +v_cmpx_gt_i16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x68,0x7d] + +v_cmpx_gt_i16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x68,0x7d] + +v_cmpx_gt_i16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x68,0x7d] + +v_cmpx_gt_i16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x68,0x7d] + +v_cmpx_gt_i16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x68,0x7d] + +v_cmpx_gt_i16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x68,0x7d] + +v_cmpx_gt_i16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x68,0x7d] + +v_cmpx_gt_i16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x68,0x7d] + +v_cmpx_gt_i16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x68,0x7d] + +v_cmpx_gt_i16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x68,0x7d] + +v_cmpx_gt_i16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x68,0x7d] + +v_cmpx_gt_i16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x68,0x7d] + +v_cmpx_gt_i16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x68,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_i32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x88,0x7d] + +v_cmpx_gt_i32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x88,0x7d] + +v_cmpx_gt_i32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x88,0x7d] + +v_cmpx_gt_i32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x88,0x7d] + +v_cmpx_gt_i32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x88,0x7d] + +v_cmpx_gt_i32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x88,0x7d] + +v_cmpx_gt_i32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x88,0x7d] + +v_cmpx_gt_i32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x88,0x7d] + +v_cmpx_gt_i32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x88,0x7d] + +v_cmpx_gt_i32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x88,0x7d] + +v_cmpx_gt_i32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x88,0x7d] + +v_cmpx_gt_i32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x88,0x7d] + +v_cmpx_gt_i32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x88,0x7d] + +v_cmpx_gt_i32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x88,0x7d] + +v_cmpx_gt_i32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x89,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_i64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xa8,0x7d] + +v_cmpx_gt_i64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xa8,0x7d] + +v_cmpx_gt_i64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xa9,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_u16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x78,0x7d] + +v_cmpx_gt_u16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x78,0x7d] + +v_cmpx_gt_u16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x78,0x7d] + +v_cmpx_gt_u16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x78,0x7d] + +v_cmpx_gt_u16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x78,0x7d] + +v_cmpx_gt_u16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x78,0x7d] + +v_cmpx_gt_u16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x78,0x7d] + +v_cmpx_gt_u16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x78,0x7d] + +v_cmpx_gt_u16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x78,0x7d] + +v_cmpx_gt_u16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x78,0x7d] + +v_cmpx_gt_u16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x78,0x7d] + +v_cmpx_gt_u16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x78,0x7d] + +v_cmpx_gt_u16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x78,0x7d] + +v_cmpx_gt_u16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x78,0x7d] + +v_cmpx_gt_u16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x78,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_u32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x98,0x7d] + +v_cmpx_gt_u32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x98,0x7d] + +v_cmpx_gt_u32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x98,0x7d] + +v_cmpx_gt_u32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x98,0x7d] + +v_cmpx_gt_u32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x98,0x7d] + +v_cmpx_gt_u32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x98,0x7d] + +v_cmpx_gt_u32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x98,0x7d] + +v_cmpx_gt_u32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x98,0x7d] + +v_cmpx_gt_u32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x98,0x7d] + +v_cmpx_gt_u32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x98,0x7d] + +v_cmpx_gt_u32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x98,0x7d] + +v_cmpx_gt_u32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x98,0x7d] + +v_cmpx_gt_u32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x98,0x7d] + +v_cmpx_gt_u32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x98,0x7d] + +v_cmpx_gt_u32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x99,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_u64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xb8,0x7d] + +v_cmpx_gt_u64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xb8,0x7d] + +v_cmpx_gt_u64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xb9,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x06,0x7d] + +v_cmpx_le_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x06,0x7d] + +v_cmpx_le_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x06,0x7d] + +v_cmpx_le_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x06,0x7d] + +v_cmpx_le_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x06,0x7d] + +v_cmpx_le_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x06,0x7d] + +v_cmpx_le_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x06,0x7d] + +v_cmpx_le_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x06,0x7d] + +v_cmpx_le_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x06,0x7d] + +v_cmpx_le_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x06,0x7d] + +v_cmpx_le_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x06,0x7d] + +v_cmpx_le_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x06,0x7d] + +v_cmpx_le_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x06,0x7d] + +v_cmpx_le_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x06,0x7d] + +v_cmpx_le_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x06,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x26,0x7d] + +v_cmpx_le_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x26,0x7d] + +v_cmpx_le_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x26,0x7d] + +v_cmpx_le_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x26,0x7d] + +v_cmpx_le_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x26,0x7d] + +v_cmpx_le_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x26,0x7d] + +v_cmpx_le_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x26,0x7d] + +v_cmpx_le_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x26,0x7d] + +v_cmpx_le_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x26,0x7d] + +v_cmpx_le_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x26,0x7d] + +v_cmpx_le_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x26,0x7d] + +v_cmpx_le_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x26,0x7d] + +v_cmpx_le_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x26,0x7d] + +v_cmpx_le_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x26,0x7d] + +v_cmpx_le_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x27,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x46,0x7d] + +v_cmpx_le_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x46,0x7d] + +v_cmpx_le_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x46,0x7d] + +v_cmpx_le_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x46,0x7d] + +v_cmpx_le_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x46,0x7d] + +v_cmpx_le_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x46,0x7d] + +v_cmpx_le_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x46,0x7d] + +v_cmpx_le_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x46,0x7d] + +v_cmpx_le_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x46,0x7d] + +v_cmpx_le_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x46,0x7d] + +v_cmpx_le_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x46,0x7d] + +v_cmpx_le_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x47,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_i16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x66,0x7d] + +v_cmpx_le_i16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x66,0x7d] + +v_cmpx_le_i16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x66,0x7d] + +v_cmpx_le_i16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x66,0x7d] + +v_cmpx_le_i16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x66,0x7d] + +v_cmpx_le_i16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x66,0x7d] + +v_cmpx_le_i16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x66,0x7d] + +v_cmpx_le_i16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x66,0x7d] + +v_cmpx_le_i16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x66,0x7d] + +v_cmpx_le_i16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x66,0x7d] + +v_cmpx_le_i16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x66,0x7d] + +v_cmpx_le_i16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x66,0x7d] + +v_cmpx_le_i16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x66,0x7d] + +v_cmpx_le_i16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x66,0x7d] + +v_cmpx_le_i16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x66,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_i32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x86,0x7d] + +v_cmpx_le_i32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x86,0x7d] + +v_cmpx_le_i32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x86,0x7d] + +v_cmpx_le_i32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x86,0x7d] + +v_cmpx_le_i32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x86,0x7d] + +v_cmpx_le_i32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x86,0x7d] + +v_cmpx_le_i32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x86,0x7d] + +v_cmpx_le_i32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x86,0x7d] + +v_cmpx_le_i32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x86,0x7d] + +v_cmpx_le_i32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x86,0x7d] + +v_cmpx_le_i32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x86,0x7d] + +v_cmpx_le_i32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x86,0x7d] + +v_cmpx_le_i32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x86,0x7d] + +v_cmpx_le_i32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x86,0x7d] + +v_cmpx_le_i32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x87,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_i64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xa6,0x7d] + +v_cmpx_le_i64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xa6,0x7d] + +v_cmpx_le_i64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xa6,0x7d] + +v_cmpx_le_i64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xa6,0x7d] + +v_cmpx_le_i64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xa6,0x7d] + +v_cmpx_le_i64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xa6,0x7d] + +v_cmpx_le_i64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xa6,0x7d] + +v_cmpx_le_i64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xa6,0x7d] + +v_cmpx_le_i64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xa6,0x7d] + +v_cmpx_le_i64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xa6,0x7d] + +v_cmpx_le_i64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xa6,0x7d] + +v_cmpx_le_i64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xa7,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_u16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x76,0x7d] + +v_cmpx_le_u16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x76,0x7d] + +v_cmpx_le_u16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x76,0x7d] + +v_cmpx_le_u16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x76,0x7d] + +v_cmpx_le_u16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x76,0x7d] + +v_cmpx_le_u16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x76,0x7d] + +v_cmpx_le_u16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x76,0x7d] + +v_cmpx_le_u16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x76,0x7d] + +v_cmpx_le_u16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x76,0x7d] + +v_cmpx_le_u16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x76,0x7d] + +v_cmpx_le_u16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x76,0x7d] + +v_cmpx_le_u16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x76,0x7d] + +v_cmpx_le_u16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x76,0x7d] + +v_cmpx_le_u16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x76,0x7d] + +v_cmpx_le_u16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x76,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_u32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x96,0x7d] + +v_cmpx_le_u32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x96,0x7d] + +v_cmpx_le_u32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x96,0x7d] + +v_cmpx_le_u32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x96,0x7d] + +v_cmpx_le_u32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x96,0x7d] + +v_cmpx_le_u32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x96,0x7d] + +v_cmpx_le_u32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x96,0x7d] + +v_cmpx_le_u32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x96,0x7d] + +v_cmpx_le_u32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x96,0x7d] + +v_cmpx_le_u32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x96,0x7d] + +v_cmpx_le_u32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x96,0x7d] + +v_cmpx_le_u32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x96,0x7d] + +v_cmpx_le_u32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x96,0x7d] + +v_cmpx_le_u32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x96,0x7d] + +v_cmpx_le_u32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x97,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_u64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xb6,0x7d] + +v_cmpx_le_u64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xb6,0x7d] + +v_cmpx_le_u64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xb6,0x7d] + +v_cmpx_le_u64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xb6,0x7d] + +v_cmpx_le_u64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xb6,0x7d] + +v_cmpx_le_u64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xb6,0x7d] + +v_cmpx_le_u64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xb6,0x7d] + +v_cmpx_le_u64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xb6,0x7d] + +v_cmpx_le_u64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xb6,0x7d] + +v_cmpx_le_u64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xb6,0x7d] + +v_cmpx_le_u64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xb6,0x7d] + +v_cmpx_le_u64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xb7,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lg_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x7d] + +v_cmpx_lg_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x7d] + +v_cmpx_lg_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x0a,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lg_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x2a,0x7d] + +v_cmpx_lg_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x2a,0x7d] + +v_cmpx_lg_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x2b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lg_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x4a,0x7d] + +v_cmpx_lg_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x4a,0x7d] + +v_cmpx_lg_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x4b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x02,0x7d] + +v_cmpx_lt_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x02,0x7d] + +v_cmpx_lt_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x02,0x7d] + +v_cmpx_lt_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x02,0x7d] + +v_cmpx_lt_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x02,0x7d] + +v_cmpx_lt_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x02,0x7d] + +v_cmpx_lt_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x02,0x7d] + +v_cmpx_lt_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x02,0x7d] + +v_cmpx_lt_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x02,0x7d] + +v_cmpx_lt_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x02,0x7d] + +v_cmpx_lt_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x02,0x7d] + +v_cmpx_lt_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x02,0x7d] + +v_cmpx_lt_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x02,0x7d] + +v_cmpx_lt_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x02,0x7d] + +v_cmpx_lt_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x22,0x7d] + +v_cmpx_lt_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x22,0x7d] + +v_cmpx_lt_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x22,0x7d] + +v_cmpx_lt_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x22,0x7d] + +v_cmpx_lt_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x22,0x7d] + +v_cmpx_lt_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x22,0x7d] + +v_cmpx_lt_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x22,0x7d] + +v_cmpx_lt_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x22,0x7d] + +v_cmpx_lt_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x22,0x7d] + +v_cmpx_lt_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x22,0x7d] + +v_cmpx_lt_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x22,0x7d] + +v_cmpx_lt_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x22,0x7d] + +v_cmpx_lt_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x22,0x7d] + +v_cmpx_lt_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x22,0x7d] + +v_cmpx_lt_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x23,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x42,0x7d] + +v_cmpx_lt_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x42,0x7d] + +v_cmpx_lt_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x42,0x7d] + +v_cmpx_lt_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x42,0x7d] + +v_cmpx_lt_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x42,0x7d] + +v_cmpx_lt_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x42,0x7d] + +v_cmpx_lt_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x42,0x7d] + +v_cmpx_lt_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x42,0x7d] + +v_cmpx_lt_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x42,0x7d] + +v_cmpx_lt_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x42,0x7d] + +v_cmpx_lt_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x42,0x7d] + +v_cmpx_lt_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x43,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_i16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x62,0x7d] + +v_cmpx_lt_i16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x62,0x7d] + +v_cmpx_lt_i16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x62,0x7d] + +v_cmpx_lt_i16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x62,0x7d] + +v_cmpx_lt_i16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x62,0x7d] + +v_cmpx_lt_i16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x62,0x7d] + +v_cmpx_lt_i16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x62,0x7d] + +v_cmpx_lt_i16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x62,0x7d] + +v_cmpx_lt_i16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x62,0x7d] + +v_cmpx_lt_i16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x62,0x7d] + +v_cmpx_lt_i16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x62,0x7d] + +v_cmpx_lt_i16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x62,0x7d] + +v_cmpx_lt_i16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x62,0x7d] + +v_cmpx_lt_i16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x62,0x7d] + +v_cmpx_lt_i16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x62,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_i32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x82,0x7d] + +v_cmpx_lt_i32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x82,0x7d] + +v_cmpx_lt_i32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x82,0x7d] + +v_cmpx_lt_i32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x82,0x7d] + +v_cmpx_lt_i32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x82,0x7d] + +v_cmpx_lt_i32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x82,0x7d] + +v_cmpx_lt_i32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x82,0x7d] + +v_cmpx_lt_i32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x82,0x7d] + +v_cmpx_lt_i32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x82,0x7d] + +v_cmpx_lt_i32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x82,0x7d] + +v_cmpx_lt_i32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x82,0x7d] + +v_cmpx_lt_i32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x82,0x7d] + +v_cmpx_lt_i32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x82,0x7d] + +v_cmpx_lt_i32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x82,0x7d] + +v_cmpx_lt_i32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x83,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_i64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xa2,0x7d] + +v_cmpx_lt_i64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xa2,0x7d] + +v_cmpx_lt_i64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xa3,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_u16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x72,0x7d] + +v_cmpx_lt_u16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x72,0x7d] + +v_cmpx_lt_u16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x72,0x7d] + +v_cmpx_lt_u16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x72,0x7d] + +v_cmpx_lt_u16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x72,0x7d] + +v_cmpx_lt_u16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x72,0x7d] + +v_cmpx_lt_u16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x72,0x7d] + +v_cmpx_lt_u16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x72,0x7d] + +v_cmpx_lt_u16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x72,0x7d] + +v_cmpx_lt_u16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x72,0x7d] + +v_cmpx_lt_u16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x72,0x7d] + +v_cmpx_lt_u16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x72,0x7d] + +v_cmpx_lt_u16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x72,0x7d] + +v_cmpx_lt_u16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x72,0x7d] + +v_cmpx_lt_u16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x72,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_u32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x92,0x7d] + +v_cmpx_lt_u32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x92,0x7d] + +v_cmpx_lt_u32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x92,0x7d] + +v_cmpx_lt_u32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x92,0x7d] + +v_cmpx_lt_u32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x92,0x7d] + +v_cmpx_lt_u32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x92,0x7d] + +v_cmpx_lt_u32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x92,0x7d] + +v_cmpx_lt_u32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x92,0x7d] + +v_cmpx_lt_u32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x92,0x7d] + +v_cmpx_lt_u32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x92,0x7d] + +v_cmpx_lt_u32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x92,0x7d] + +v_cmpx_lt_u32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x92,0x7d] + +v_cmpx_lt_u32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x92,0x7d] + +v_cmpx_lt_u32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x92,0x7d] + +v_cmpx_lt_u32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x93,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_u64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xb2,0x7d] + +v_cmpx_lt_u64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xb2,0x7d] + +v_cmpx_lt_u64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xb3,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_i16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x6a,0x7d] + +v_cmpx_ne_i16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x6a,0x7d] + +v_cmpx_ne_i16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x6a,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ne_i32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x8a,0x7d] + +v_cmpx_ne_i32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x8a,0x7d] + +v_cmpx_ne_i32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x8b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_i64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xaa,0x7d] + +v_cmpx_ne_i64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xaa,0x7d] + +v_cmpx_ne_i64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xab,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_u16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x7a,0x7d] + +v_cmpx_ne_u16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x7a,0x7d] + +v_cmpx_ne_u16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x7a,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ne_u32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x9a,0x7d] + +v_cmpx_ne_u32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x9a,0x7d] + +v_cmpx_ne_u32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x9b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_u64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xba,0x7d] + +v_cmpx_ne_u64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xba,0x7d] + +v_cmpx_ne_u64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xba,0x7d] + +v_cmpx_ne_u64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xba,0x7d] + +v_cmpx_ne_u64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xba,0x7d] + +v_cmpx_ne_u64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xba,0x7d] + +v_cmpx_ne_u64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xba,0x7d] + +v_cmpx_ne_u64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xba,0x7d] + +v_cmpx_ne_u64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xba,0x7d] + +v_cmpx_ne_u64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xba,0x7d] + +v_cmpx_ne_u64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xba,0x7d] + +v_cmpx_ne_u64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xbb,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_neq_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x1a,0x7d] + +v_cmpx_neq_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x1a,0x7d] + +v_cmpx_neq_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x1a,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_neq_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x3a,0x7d] + +v_cmpx_neq_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x3a,0x7d] + +v_cmpx_neq_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x3b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_neq_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x5a,0x7d] + +v_cmpx_neq_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x5a,0x7d] + +v_cmpx_neq_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x5b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nge_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x12,0x7d] + +v_cmpx_nge_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x12,0x7d] + +v_cmpx_nge_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x12,0x7d] + +v_cmpx_nge_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x12,0x7d] + +v_cmpx_nge_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x12,0x7d] + +v_cmpx_nge_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x12,0x7d] + +v_cmpx_nge_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x12,0x7d] + +v_cmpx_nge_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x12,0x7d] + +v_cmpx_nge_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x12,0x7d] + +v_cmpx_nge_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x12,0x7d] + +v_cmpx_nge_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x12,0x7d] + +v_cmpx_nge_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x12,0x7d] + +v_cmpx_nge_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x12,0x7d] + +v_cmpx_nge_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x12,0x7d] + +v_cmpx_nge_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x12,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_nge_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x32,0x7d] + +v_cmpx_nge_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x32,0x7d] + +v_cmpx_nge_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x32,0x7d] + +v_cmpx_nge_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x32,0x7d] + +v_cmpx_nge_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x32,0x7d] + +v_cmpx_nge_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x32,0x7d] + +v_cmpx_nge_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x32,0x7d] + +v_cmpx_nge_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x32,0x7d] + +v_cmpx_nge_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x32,0x7d] + +v_cmpx_nge_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x32,0x7d] + +v_cmpx_nge_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x32,0x7d] + +v_cmpx_nge_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x32,0x7d] + +v_cmpx_nge_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x32,0x7d] + +v_cmpx_nge_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x32,0x7d] + +v_cmpx_nge_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x33,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nge_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x52,0x7d] + +v_cmpx_nge_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x52,0x7d] + +v_cmpx_nge_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x52,0x7d] + +v_cmpx_nge_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x52,0x7d] + +v_cmpx_nge_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x52,0x7d] + +v_cmpx_nge_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x52,0x7d] + +v_cmpx_nge_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x52,0x7d] + +v_cmpx_nge_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x52,0x7d] + +v_cmpx_nge_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x52,0x7d] + +v_cmpx_nge_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x52,0x7d] + +v_cmpx_nge_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x52,0x7d] + +v_cmpx_nge_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x53,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ngt_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x16,0x7d] + +v_cmpx_ngt_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x16,0x7d] + +v_cmpx_ngt_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x16,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ngt_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x36,0x7d] + +v_cmpx_ngt_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x36,0x7d] + +v_cmpx_ngt_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x37,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ngt_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x56,0x7d] + +v_cmpx_ngt_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x56,0x7d] + +v_cmpx_ngt_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x57,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nle_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x18,0x7d] + +v_cmpx_nle_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x18,0x7d] + +v_cmpx_nle_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x18,0x7d] + +v_cmpx_nle_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x18,0x7d] + +v_cmpx_nle_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x18,0x7d] + +v_cmpx_nle_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x18,0x7d] + +v_cmpx_nle_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x18,0x7d] + +v_cmpx_nle_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x18,0x7d] + +v_cmpx_nle_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x18,0x7d] + +v_cmpx_nle_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x18,0x7d] + +v_cmpx_nle_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x18,0x7d] + +v_cmpx_nle_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x18,0x7d] + +v_cmpx_nle_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x18,0x7d] + +v_cmpx_nle_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x18,0x7d] + +v_cmpx_nle_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x18,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_nle_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x38,0x7d] + +v_cmpx_nle_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x38,0x7d] + +v_cmpx_nle_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x38,0x7d] + +v_cmpx_nle_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x38,0x7d] + +v_cmpx_nle_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x38,0x7d] + +v_cmpx_nle_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x38,0x7d] + +v_cmpx_nle_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x38,0x7d] + +v_cmpx_nle_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x38,0x7d] + +v_cmpx_nle_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x38,0x7d] + +v_cmpx_nle_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x38,0x7d] + +v_cmpx_nle_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x38,0x7d] + +v_cmpx_nle_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x38,0x7d] + +v_cmpx_nle_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x38,0x7d] + +v_cmpx_nle_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x38,0x7d] + +v_cmpx_nle_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x39,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nle_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x58,0x7d] + +v_cmpx_nle_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x58,0x7d] + +v_cmpx_nle_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x58,0x7d] + +v_cmpx_nle_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x58,0x7d] + +v_cmpx_nle_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x58,0x7d] + +v_cmpx_nle_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x58,0x7d] + +v_cmpx_nle_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x58,0x7d] + +v_cmpx_nle_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x58,0x7d] + +v_cmpx_nle_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x58,0x7d] + +v_cmpx_nle_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x58,0x7d] + +v_cmpx_nle_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x58,0x7d] + +v_cmpx_nle_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x59,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nlg_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x14,0x7d] + +v_cmpx_nlg_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x14,0x7d] + +v_cmpx_nlg_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x14,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_nlg_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x34,0x7d] + +v_cmpx_nlg_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x34,0x7d] + +v_cmpx_nlg_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x35,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nlg_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x54,0x7d] + +v_cmpx_nlg_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x54,0x7d] + +v_cmpx_nlg_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x55,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nlt_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x1c,0x7d] + +v_cmpx_nlt_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x1c,0x7d] + +v_cmpx_nlt_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x1c,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_nlt_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x3c,0x7d] + +v_cmpx_nlt_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x3c,0x7d] + +v_cmpx_nlt_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x3d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nlt_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x5c,0x7d] + +v_cmpx_nlt_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x5c,0x7d] + +v_cmpx_nlt_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x5d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_o_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x0e,0x7d] + +v_cmpx_o_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0e,0x7d] + +v_cmpx_o_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x0e,0x7d] + +v_cmpx_o_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x0e,0x7d] + +v_cmpx_o_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0e,0x7d] + +v_cmpx_o_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0e,0x7d] + +v_cmpx_o_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0e,0x7d] + +v_cmpx_o_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0e,0x7d] + +v_cmpx_o_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0e,0x7d] + +v_cmpx_o_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0e,0x7d] + +v_cmpx_o_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x0e,0x7d] + +v_cmpx_o_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0e,0x7d] + +v_cmpx_o_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0e,0x7d] + +v_cmpx_o_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0e,0x7d] + +v_cmpx_o_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x0e,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_o_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x2e,0x7d] + +v_cmpx_o_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x2e,0x7d] + +v_cmpx_o_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x2e,0x7d] + +v_cmpx_o_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x2e,0x7d] + +v_cmpx_o_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x2e,0x7d] + +v_cmpx_o_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x2e,0x7d] + +v_cmpx_o_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x2e,0x7d] + +v_cmpx_o_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x2e,0x7d] + +v_cmpx_o_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x2e,0x7d] + +v_cmpx_o_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x2e,0x7d] + +v_cmpx_o_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x2e,0x7d] + +v_cmpx_o_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x2e,0x7d] + +v_cmpx_o_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x2e,0x7d] + +v_cmpx_o_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x2e,0x7d] + +v_cmpx_o_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x2f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_o_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x4e,0x7d] + +v_cmpx_o_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x4e,0x7d] + +v_cmpx_o_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x4e,0x7d] + +v_cmpx_o_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x4e,0x7d] + +v_cmpx_o_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x4e,0x7d] + +v_cmpx_o_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x4e,0x7d] + +v_cmpx_o_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x4e,0x7d] + +v_cmpx_o_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x4e,0x7d] + +v_cmpx_o_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x4e,0x7d] + +v_cmpx_o_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x4e,0x7d] + +v_cmpx_o_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x4e,0x7d] + +v_cmpx_o_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x4f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_t_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x1e,0x7d] + +v_cmpx_t_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x1e,0x7d] + +v_cmpx_t_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x1e,0x7d] + +v_cmpx_t_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x1e,0x7d] + +v_cmpx_t_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x1e,0x7d] + +v_cmpx_t_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x1e,0x7d] + +v_cmpx_t_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x1e,0x7d] + +v_cmpx_t_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x1e,0x7d] + +v_cmpx_t_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x1e,0x7d] + +v_cmpx_t_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x1e,0x7d] + +v_cmpx_t_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x1e,0x7d] + +v_cmpx_t_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x1e,0x7d] + +v_cmpx_t_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x1e,0x7d] + +v_cmpx_t_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x1e,0x7d] + +v_cmpx_t_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x1e,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_t_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x3e,0x7d] + +v_cmpx_t_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x3e,0x7d] + +v_cmpx_t_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x3e,0x7d] + +v_cmpx_t_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x3e,0x7d] + +v_cmpx_t_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x3e,0x7d] + +v_cmpx_t_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x3e,0x7d] + +v_cmpx_t_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x3e,0x7d] + +v_cmpx_t_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x3e,0x7d] + +v_cmpx_t_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x3e,0x7d] + +v_cmpx_t_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x3e,0x7d] + +v_cmpx_t_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x3e,0x7d] + +v_cmpx_t_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x3e,0x7d] + +v_cmpx_t_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x3e,0x7d] + +v_cmpx_t_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x3e,0x7d] + +v_cmpx_t_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x3f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_t_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x5e,0x7d] + +v_cmpx_t_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x5e,0x7d] + +v_cmpx_t_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x5e,0x7d] + +v_cmpx_t_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x5e,0x7d] + +v_cmpx_t_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x5e,0x7d] + +v_cmpx_t_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x5e,0x7d] + +v_cmpx_t_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x5e,0x7d] + +v_cmpx_t_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x5e,0x7d] + +v_cmpx_t_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x5e,0x7d] + +v_cmpx_t_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x5e,0x7d] + +v_cmpx_t_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x5e,0x7d] + +v_cmpx_t_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x5f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_t_i32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x8e,0x7d] + +v_cmpx_t_i32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x8e,0x7d] + +v_cmpx_t_i32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x8e,0x7d] + +v_cmpx_t_i32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x8e,0x7d] + +v_cmpx_t_i32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x8e,0x7d] + +v_cmpx_t_i32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x8e,0x7d] + +v_cmpx_t_i32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x8e,0x7d] + +v_cmpx_t_i32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x8e,0x7d] + +v_cmpx_t_i32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x8e,0x7d] + +v_cmpx_t_i32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x8e,0x7d] + +v_cmpx_t_i32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x8e,0x7d] + +v_cmpx_t_i32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x8e,0x7d] + +v_cmpx_t_i32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x8e,0x7d] + +v_cmpx_t_i32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x8e,0x7d] + +v_cmpx_t_i32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x8f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_t_i64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xae,0x7d] + +v_cmpx_t_i64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xae,0x7d] + +v_cmpx_t_i64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xae,0x7d] + +v_cmpx_t_i64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xae,0x7d] + +v_cmpx_t_i64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xae,0x7d] + +v_cmpx_t_i64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xae,0x7d] + +v_cmpx_t_i64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xae,0x7d] + +v_cmpx_t_i64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xae,0x7d] + +v_cmpx_t_i64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xae,0x7d] + +v_cmpx_t_i64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xae,0x7d] + +v_cmpx_t_i64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xae,0x7d] + +v_cmpx_t_i64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xaf,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_t_u32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x9e,0x7d] + +v_cmpx_t_u32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x9e,0x7d] + +v_cmpx_t_u32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x9e,0x7d] + +v_cmpx_t_u32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x9e,0x7d] + +v_cmpx_t_u32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x9e,0x7d] + +v_cmpx_t_u32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x9e,0x7d] + +v_cmpx_t_u32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x9e,0x7d] + +v_cmpx_t_u32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x9e,0x7d] + +v_cmpx_t_u32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x9e,0x7d] + +v_cmpx_t_u32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x9e,0x7d] + +v_cmpx_t_u32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x9e,0x7d] + +v_cmpx_t_u32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x9e,0x7d] + +v_cmpx_t_u32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x9e,0x7d] + +v_cmpx_t_u32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x9e,0x7d] + +v_cmpx_t_u32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x9f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_t_u64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0xbe,0x7d] + +v_cmpx_t_u64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0xbe,0x7d] + +v_cmpx_t_u64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0xbe,0x7d] + +v_cmpx_t_u64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0xbe,0x7d] + +v_cmpx_t_u64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0xbe,0x7d] + +v_cmpx_t_u64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0xbe,0x7d] + +v_cmpx_t_u64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0xbe,0x7d] + +v_cmpx_t_u64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0xbe,0x7d] + +v_cmpx_t_u64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0xbe,0x7d] + +v_cmpx_t_u64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0xbe,0x7d] + +v_cmpx_t_u64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0xbe,0x7d] + +v_cmpx_t_u64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0xbf,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_tru_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x1e,0x7d] + +v_cmpx_tru_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x1e,0x7d] + +v_cmpx_tru_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x1e,0x7d] + +v_cmpx_tru_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x1e,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_tru_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x3e,0x7d] + +v_cmpx_tru_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x3e,0x7d] + +v_cmpx_tru_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x3e,0x7d] + +v_cmpx_tru_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x3f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_tru_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x5e,0x7d] + +v_cmpx_tru_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x5e,0x7d] + +v_cmpx_tru_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x5e,0x7d] + +v_cmpx_tru_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x5e,0x7d] + +v_cmpx_tru_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x5e,0x7d] + +v_cmpx_tru_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x5e,0x7d] + +v_cmpx_tru_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x5e,0x7d] + +v_cmpx_tru_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x5e,0x7d] + +v_cmpx_tru_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x5e,0x7d] + +v_cmpx_tru_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x5e,0x7d] + +v_cmpx_tru_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x5e,0x7d] + +v_cmpx_tru_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x5f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_u_f16 v1, v2 +// GFX11: encoding: [0x01,0x05,0x10,0x7d] + +v_cmpx_u_f16 v127, v2 +// GFX11: encoding: [0x7f,0x05,0x10,0x7d] + +v_cmpx_u_f16 s1, v2 +// GFX11: encoding: [0x01,0x04,0x10,0x7d] + +v_cmpx_u_f16 s105, v2 +// GFX11: encoding: [0x69,0x04,0x10,0x7d] + +v_cmpx_u_f16 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x10,0x7d] + +v_cmpx_u_f16 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x10,0x7d] + +v_cmpx_u_f16 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x10,0x7d] + +v_cmpx_u_f16 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x10,0x7d] + +v_cmpx_u_f16 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x10,0x7d] + +v_cmpx_u_f16 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x10,0x7d] + +v_cmpx_u_f16 null, v2 +// GFX11: encoding: [0x7c,0x04,0x10,0x7d] + +v_cmpx_u_f16 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x10,0x7d] + +v_cmpx_u_f16 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x10,0x7d] + +v_cmpx_u_f16 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x10,0x7d] + +v_cmpx_u_f16 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0x10,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_u_f32 v1, v2 +// GFX11: encoding: [0x01,0x05,0x30,0x7d] + +v_cmpx_u_f32 v255, v2 +// GFX11: encoding: [0xff,0x05,0x30,0x7d] + +v_cmpx_u_f32 s1, v2 +// GFX11: encoding: [0x01,0x04,0x30,0x7d] + +v_cmpx_u_f32 s105, v2 +// GFX11: encoding: [0x69,0x04,0x30,0x7d] + +v_cmpx_u_f32 vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x30,0x7d] + +v_cmpx_u_f32 vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x30,0x7d] + +v_cmpx_u_f32 ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x30,0x7d] + +v_cmpx_u_f32 m0, v2 +// GFX11: encoding: [0x7d,0x04,0x30,0x7d] + +v_cmpx_u_f32 exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x30,0x7d] + +v_cmpx_u_f32 exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x30,0x7d] + +v_cmpx_u_f32 null, v2 +// GFX11: encoding: [0x7c,0x04,0x30,0x7d] + +v_cmpx_u_f32 -1, v2 +// GFX11: encoding: [0xc1,0x04,0x30,0x7d] + +v_cmpx_u_f32 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x30,0x7d] + +v_cmpx_u_f32 src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x30,0x7d] + +v_cmpx_u_f32 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0x31,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_u_f64 v[1:2], v[2:3] +// GFX11: encoding: [0x01,0x05,0x50,0x7d] + +v_cmpx_u_f64 v[254:255], v[2:3] +// GFX11: encoding: [0xfe,0x05,0x50,0x7d] + +v_cmpx_u_f64 s[2:3], v[2:3] +// GFX11: encoding: [0x02,0x04,0x50,0x7d] + +v_cmpx_u_f64 s[104:105], v[2:3] +// GFX11: encoding: [0x68,0x04,0x50,0x7d] + +v_cmpx_u_f64 vcc, v[2:3] +// GFX11: encoding: [0x6a,0x04,0x50,0x7d] + +v_cmpx_u_f64 ttmp[14:15], v[2:3] +// GFX11: encoding: [0x7a,0x04,0x50,0x7d] + +v_cmpx_u_f64 exec, v[2:3] +// GFX11: encoding: [0x7e,0x04,0x50,0x7d] + +v_cmpx_u_f64 null, v[2:3] +// GFX11: encoding: [0x7c,0x04,0x50,0x7d] + +v_cmpx_u_f64 -1, v[2:3] +// GFX11: encoding: [0xc1,0x04,0x50,0x7d] + +v_cmpx_u_f64 0.5, v[2:3] +// GFX11: encoding: [0xf0,0x04,0x50,0x7d] + +v_cmpx_u_f64 src_scc, v[2:3] +// GFX11: encoding: [0xfd,0x04,0x50,0x7d] + +v_cmpx_u_f64 0xaf123456, v[254:255] +// GFX11: encoding: [0xff,0xfc,0x51,0x7d,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s index e90bb80f098cb0f..17e60c08f6a77ea 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s v_cmpx_class_f16_e32 v1, v2 // GFX11: encoding: [0x01,0x05,0xfa,0x7d] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16-fake16.s new file mode 100644 index 000000000000000..897eefe1fcbe22d --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16-fake16.s @@ -0,0 +1,2690 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s + +v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_class_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_class_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_class_f16 -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfa,0x7d,0x7f,0x6f,0x35,0x30] + +v_cmpx_class_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_class_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_class_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_class_f32 -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfd,0x7d,0xff,0x6f,0x35,0x30] + +v_cmpx_eq_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x04,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_eq_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x25,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_eq_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_i16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x64,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_eq_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_i32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x85,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_eq_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_u16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x74,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_eq_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_u32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x95,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_f_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_f_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_f_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_f_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_f_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_f_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_f_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_f_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_f_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_f_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_f_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_f_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_f_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_f_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x00,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_f_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_f_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_f_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_f_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_f_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_f_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_f_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_f_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_f_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_f_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_f_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_f_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_f_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_f_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x21,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_f_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_f_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_f_i32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_f_i32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_f_i32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_f_i32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_f_i32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_f_i32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_f_i32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_f_i32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_f_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_f_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_f_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_f_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x81,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_f_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_f_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_f_u32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_f_u32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_f_u32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_f_u32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_f_u32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_f_u32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_f_u32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_f_u32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_f_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_f_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_f_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_f_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x91,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x0c,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_ge_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x2d,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_ge_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_i16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x6c,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_ge_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_i32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x8d,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_u16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x7c,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_ge_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_u32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x9d,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x08,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_gt_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x29,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_gt_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_i16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x68,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_gt_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_i32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x89,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_u16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x78,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_gt_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_u32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x99,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_le_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x06,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_le_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x27,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_le_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_i16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x66,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_le_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_i32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x87,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_le_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_u16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x76,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_le_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_u32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x97,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_lg_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lg_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lg_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lg_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x0a,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_lg_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lg_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x2b,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x23,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_lt_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_i16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x62,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_lt_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_i32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x83,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_u16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x72,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_lt_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_u32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x93,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_ne_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_i16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ne_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x6a,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_ne_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_i32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ne_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x8b,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_ne_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_u16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ne_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x7a,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_ne_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_u32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ne_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x9b,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_neq_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_neq_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_neq_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_neq_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x1a,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_neq_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_neq_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_neq_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_neq_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x3b,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_nge_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nge_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nge_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nge_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x12,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_nge_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nge_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nge_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nge_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x33,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_ngt_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ngt_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ngt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ngt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x16,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_ngt_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ngt_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ngt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ngt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x37,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_nle_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nle_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nle_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nle_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x18,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_nle_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nle_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nle_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nle_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x39,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_nlg_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nlg_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nlg_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nlg_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x14,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_nlg_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nlg_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nlg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nlg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x35,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_nlt_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nlt_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nlt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nlt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x1c,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_nlt_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nlt_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nlt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nlt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x3d,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_o_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_o_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_o_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_o_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x0e,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_o_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_o_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_o_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_o_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x2f,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_t_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_t_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_t_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_t_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_t_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_t_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_t_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_t_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_t_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_t_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_t_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_t_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_t_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_t_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x1e,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_t_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_t_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_t_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_t_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_t_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_t_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_t_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_t_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_t_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_t_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_t_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_t_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_t_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_t_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x3f,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_t_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_t_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_t_i32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_t_i32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_t_i32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_t_i32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_t_i32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_t_i32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_t_i32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_t_i32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_t_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_t_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_t_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_t_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x8f,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_t_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_t_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_t_u32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_t_u32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_t_u32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_t_u32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_t_u32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_t_u32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_t_u32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_t_u32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_t_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_t_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_t_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_t_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x9f,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_tru_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_tru_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_tru_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_tru_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_tru_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_tru_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_tru_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_tru_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_tru_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_tru_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_tru_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_tru_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_tru_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_tru_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x1e,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_tru_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_tru_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_tru_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_tru_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_tru_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_tru_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_tru_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_tru_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_tru_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_tru_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_tru_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_tru_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_tru_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_tru_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x3f,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_u_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_u_f16 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_u_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_u_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x10,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_u_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_u_f32 v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_u_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_u_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0x31,0x7d,0xff,0x6f,0xf5,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s index d8fc1d3e2b3cd9b..e46661df84a1554 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0] // GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8-fake16.s new file mode 100644 index 000000000000000..e66da32fe0329a8 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8-fake16.s @@ -0,0 +1,578 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s + +v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfa,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0xfc,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0xfc,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfd,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_eq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x04,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x04,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x04,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_eq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x24,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x24,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x25,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_eq_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x64,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x64,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x64,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_eq_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x84,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x84,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x85,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_eq_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x74,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x74,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x74,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_eq_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x94,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x94,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x95,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_f_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x00,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_f_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x00,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_f_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x00,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_f_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x20,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_f_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x20,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_f_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x21,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_f_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x80,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_f_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x80,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_f_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x81,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_f_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x90,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_f_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x90,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_f_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x91,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x0c,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x2c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x2c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x2d,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ge_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x6c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x6c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x6c,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ge_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x8c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x8c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x8d,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ge_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x7c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x7c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x7c,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ge_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x9c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x9c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x9d,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_gt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x08,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x08,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x08,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_gt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x28,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x28,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x29,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_gt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x68,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x68,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x68,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_gt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x88,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x88,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x89,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_gt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x78,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x78,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x78,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_gt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x98,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x98,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x99,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_le_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x06,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x06,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x06,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_le_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x26,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x26,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x27,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_le_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x66,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x66,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x66,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_le_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x86,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x86,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x87,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_le_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x76,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x76,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x76,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_le_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x96,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x96,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x97,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_lg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x0a,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x2a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x2a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x2b,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x22,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x23,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_lt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x62,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x62,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x62,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x82,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x82,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x83,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_lt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x72,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x72,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x72,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x92,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x92,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x93,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ne_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x6a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x6a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x6a,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ne_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x8a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x8a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x8b,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ne_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x7a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x7a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x7a,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ne_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x9a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x9a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x9b,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_neq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x1a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x1a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x1a,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_neq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x3a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x3a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x3b,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_nge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x12,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x12,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x12,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_nge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x32,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x32,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x33,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ngt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x16,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x16,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x16,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ngt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x36,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x36,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x37,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_nle_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x18,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x18,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x18,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_nle_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x38,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x38,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x39,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_nlg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x14,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x14,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x14,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_nlg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x34,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x34,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x35,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_nlt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x1c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x1c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x1c,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_nlt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x3c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x3c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x3d,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_o_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x0e,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_o_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x2e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x2e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x2f,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_t_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x1e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_t_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x1e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_t_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x1e,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_t_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x3e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_t_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x3e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_t_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x3f,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_t_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x8e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_t_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x8e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_t_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x8f,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_t_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x9e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_t_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x9e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_t_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x9f,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_tru_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x1e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_tru_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x1e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_tru_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x1e,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_tru_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x3e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_tru_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x3e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_tru_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x3f,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_u_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x10,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x10,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x10,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_u_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x30,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x30,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0x31,0x7d,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s index 9db7e48809ee154..5062f901d2aa3a0 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s index c37d15b7abc79bd..ec628dd94f366ba 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s @@ -1,542 +1,542 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s v_cmpx_class_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmpx_eq_i16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmpx_eq_u16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_f_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_i16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ne_u16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ngt_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_f_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nlt_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_f_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_o_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_f_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_t_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_f_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_tru_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_f_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_u_f16_e32 v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_f_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_class_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_i16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_u16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_f_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_i16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ne_u16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ngt_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_i16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nlt_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_o_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_t_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_i16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_tru_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_u_f16_e32 v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_f_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_t_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_tru_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_f_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction v_cmpx_lt_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_t_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_tru_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_f_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ngt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_t_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_tru_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_f_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_t_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_t_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_t_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction -v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_t_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:18: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_t_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:18: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_t_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:18: error: invalid operand for instruction -v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_tru_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_tru_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_tru_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction -v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_tru_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_tru_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_tru_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction -v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_u_f16_e32 v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_t_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_tru_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_u_f16_e32 v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmpx_u_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction +v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s index a279bd381848e82..3bbdf3d3a903f87 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s @@ -1,542 +1,542 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s v_cmpx_class_f16 v1, v255 -// GFX11: v_cmpx_class_f16_e64 +// GFX11: v_cmpx_class_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_eq_f16 v1, v255 -// GFX11: v_cmpx_eq_f16_e64 +v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_eq_i16 v1, v255 -// GFX11: v_cmpx_eq_i16_e64 +v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_eq_u16 v1, v255 -// GFX11: v_cmpx_eq_u16_e64 +v_cmpx_class_f16 v255, v2 +// GFX11: v_cmpx_class_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_f_f16 v1, v255 -// GFX11: v_cmpx_f_f16_e64 +v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_class_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ge_f16 v1, v255 -// GFX11: v_cmpx_ge_f16_e64 +v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_class_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ge_i16 v1, v255 -// GFX11: v_cmpx_ge_i16_e64 +v_cmpx_eq_f16 v1, v255 +// GFX11: v_cmpx_eq_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ge_u16 v1, v255 -// GFX11: v_cmpx_ge_u16_e64 +v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_gt_f16 v1, v255 -// GFX11: v_cmpx_gt_f16_e64 +v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_gt_i16 v1, v255 -// GFX11: v_cmpx_gt_i16_e64 +v_cmpx_eq_f16 v255, v2 +// GFX11: v_cmpx_eq_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x82,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_gt_u16 v1, v255 -// GFX11: v_cmpx_gt_u16_e64 +v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_le_f16 v1, v255 -// GFX11: v_cmpx_le_f16_e64 +v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_le_i16 v1, v255 -// GFX11: v_cmpx_le_i16_e64 +v_cmpx_eq_i16 v1, v255 +// GFX11: v_cmpx_eq_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_le_u16 v1, v255 -// GFX11: v_cmpx_le_u16_e64 +v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lg_f16 v1, v255 -// GFX11: v_cmpx_lg_f16_e64 +v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v1, v255 -// GFX11: v_cmpx_lt_f16_e64 +v_cmpx_eq_i16 v255, v2 +// GFX11: v_cmpx_eq_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_lt_i16 v1, v255 -// GFX11: v_cmpx_lt_i16_e64 +v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_lt_u16 v1, v255 -// GFX11: v_cmpx_lt_u16_e64 +v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ne_i16 v1, v255 -// GFX11: v_cmpx_ne_i16_e64 +v_cmpx_eq_u16 v1, v255 +// GFX11: v_cmpx_eq_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ne_u16 v1, v255 -// GFX11: v_cmpx_ne_u16_e64 +v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_neq_f16 v1, v255 -// GFX11: v_cmpx_neq_f16_e64 +v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_nge_f16 v1, v255 -// GFX11: v_cmpx_nge_f16_e64 +v_cmpx_eq_u16 v255, v2 +// GFX11: v_cmpx_eq_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ngt_f16 v1, v255 -// GFX11: v_cmpx_ngt_f16_e64 +v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_nle_f16 v1, v255 -// GFX11: v_cmpx_nle_f16_e64 +v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_eq_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nlg_f16 v1, v255 -// GFX11: v_cmpx_nlg_f16_e64 +v_cmpx_f_f16 v1, v255 +// GFX11: v_cmpx_f_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_nlt_f16 v1, v255 -// GFX11: v_cmpx_nlt_f16_e64 +v_cmpx_f_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_f_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x80,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_o_f16 v1, v255 -// GFX11: v_cmpx_o_f16_e64 +v_cmpx_f_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_f_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_t_f16 v1, v255 -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_f_f16 v255, v2 +// GFX11: v_cmpx_f_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x80,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_tru_f16 v1, v255 -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_f_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_f_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x80,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_u_f16 v1, v255 -// GFX11: v_cmpx_u_f16_e64 +v_cmpx_f_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_f_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_class_f16 v255, v2 -// GFX11: v_cmpx_class_f16_e64 +v_cmpx_ge_f16 v1, v255 +// GFX11: v_cmpx_ge_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_eq_f16 v255, v2 -// GFX11: v_cmpx_eq_f16_e64 +v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_eq_i16 v255, v2 -// GFX11: v_cmpx_eq_i16_e64 +v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_eq_u16 v255, v2 -// GFX11: v_cmpx_eq_u16_e64 +v_cmpx_ge_f16 v255, v2 +// GFX11: v_cmpx_ge_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x86,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_f_f16 v255, v2 -// GFX11: v_cmpx_f_f16_e64 +v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ge_f16 v255, v2 -// GFX11: v_cmpx_ge_f16_e64 +v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ge_i16 v255, v2 -// GFX11: v_cmpx_ge_i16_e64 +v_cmpx_ge_i16 v1, v255 +// GFX11: v_cmpx_ge_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ge_u16 v255, v2 -// GFX11: v_cmpx_ge_u16_e64 +v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_gt_f16 v255, v2 -// GFX11: v_cmpx_gt_f16_e64 +v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_gt_i16 v255, v2 -// GFX11: v_cmpx_gt_i16_e64 +v_cmpx_ge_i16 v255, v2 +// GFX11: v_cmpx_ge_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_gt_u16 v255, v2 -// GFX11: v_cmpx_gt_u16_e64 +v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_le_f16 v255, v2 -// GFX11: v_cmpx_le_f16_e64 +v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_le_i16 v255, v2 -// GFX11: v_cmpx_le_i16_e64 +v_cmpx_ge_u16 v1, v255 +// GFX11: v_cmpx_ge_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_le_u16 v255, v2 -// GFX11: v_cmpx_le_u16_e64 +v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lg_f16 v255, v2 -// GFX11: v_cmpx_lg_f16_e64 +v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v255, v2 -// GFX11: v_cmpx_lt_f16_e64 +v_cmpx_ge_u16 v255, v2 +// GFX11: v_cmpx_ge_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_lt_i16 v255, v2 -// GFX11: v_cmpx_lt_i16_e64 +v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_lt_u16 v255, v2 -// GFX11: v_cmpx_lt_u16_e64 +v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ge_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ne_i16 v255, v2 -// GFX11: v_cmpx_ne_i16_e64 +v_cmpx_gt_f16 v1, v255 +// GFX11: v_cmpx_gt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ne_u16 v255, v2 -// GFX11: v_cmpx_ne_u16_e64 +v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_neq_f16 v255, v2 -// GFX11: v_cmpx_neq_f16_e64 +v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_nge_f16 v255, v2 -// GFX11: v_cmpx_nge_f16_e64 +v_cmpx_gt_f16 v255, v2 +// GFX11: v_cmpx_gt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x84,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ngt_f16 v255, v2 -// GFX11: v_cmpx_ngt_f16_e64 +v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_nle_f16 v255, v2 -// GFX11: v_cmpx_nle_f16_e64 +v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nlg_f16 v255, v2 -// GFX11: v_cmpx_nlg_f16_e64 +v_cmpx_gt_i16 v1, v255 +// GFX11: v_cmpx_gt_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_nlt_f16 v255, v2 -// GFX11: v_cmpx_nlt_f16_e64 +v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_o_f16 v255, v2 -// GFX11: v_cmpx_o_f16_e64 +v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_t_f16 v255, v2 -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_gt_i16 v255, v2 +// GFX11: v_cmpx_gt_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_tru_f16 v255, v2 -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_u_f16 v255, v2 -// GFX11: v_cmpx_u_f16_e64 +v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_class_f16_e64 +v_cmpx_gt_u16 v1, v255 +// GFX11: v_cmpx_gt_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_eq_f16_e64 +v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_eq_i16_e64 +v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_eq_u16_e64 +v_cmpx_gt_u16 v255, v2 +// GFX11: v_cmpx_gt_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_f_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_f_f16_e64 +v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ge_f16_e64 +v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_gt_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ge_i16_e64 +v_cmpx_le_f16 v1, v255 +// GFX11: v_cmpx_le_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ge_u16_e64 +v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_gt_f16_e64 +v_cmpx_le_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_gt_i16_e64 +v_cmpx_le_f16 v255, v2 +// GFX11: v_cmpx_le_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x83,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_gt_u16_e64 +v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_le_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_le_f16_e64 +v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_le_i16_e64 +v_cmpx_le_i16 v1, v255 +// GFX11: v_cmpx_le_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_le_u16_e64 +v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lg_f16_e64 +v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64 +v_cmpx_le_i16 v255, v2 +// GFX11: v_cmpx_le_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_i16_e64 +v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_u16_e64 +v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ne_i16_e64 +v_cmpx_le_u16 v1, v255 +// GFX11: v_cmpx_le_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ne_u16_e64 +v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_neq_f16_e64 +v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_nge_f16_e64 +v_cmpx_le_u16 v255, v2 +// GFX11: v_cmpx_le_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ngt_f16_e64 +v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_le_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_nle_f16_e64 +v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_le_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_nlg_f16_e64 +v_cmpx_lg_f16 v1, v255 +// GFX11: v_cmpx_lg_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_nlt_f16_e64 +v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_o_f16_e64 +v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_t_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_lg_f16 v255, v2 +// GFX11: v_cmpx_lg_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x85,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_tru_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_u_f16_e64 +v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_class_f16_e64 +v_cmpx_lt_f16 v1, v255 +// GFX11: v_cmpx_lt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_eq_f16_e64 +v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_eq_i16_e64 +v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_eq_u16_e64 +v_cmpx_lt_f16 v255, v2 +// GFX11: v_cmpx_lt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_f_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_f_f16_e64 +v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ge_f16_e64 +v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ge_i16_e64 +v_cmpx_lt_i16 v1, v255 +// GFX11: v_cmpx_lt_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ge_u16_e64 +v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_gt_f16_e64 +v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_gt_i16_e64 +v_cmpx_lt_i16 v255, v2 +// GFX11: v_cmpx_lt_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_gt_u16_e64 +v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_le_f16_e64 +v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_le_i16_e64 +v_cmpx_lt_u16 v1, v255 +// GFX11: v_cmpx_lt_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_le_u16_e64 +v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lg_f16_e64 +v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64 +v_cmpx_lt_u16 v255, v2 +// GFX11: v_cmpx_lt_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_i16_e64 +v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_lt_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] v_cmpx_lt_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_lt_u16_e64 +// GFX11: v_cmpx_lt_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ne_i16_e64 - -v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ne_u16_e64 +v_cmpx_ne_i16 v1, v255 +// GFX11: v_cmpx_ne_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_neq_f16_e64 +v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_nge_f16_e64 +v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_ngt_f16_e64 +v_cmpx_ne_i16 v255, v2 +// GFX11: v_cmpx_ne_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_nle_f16_e64 +v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_nlg_f16_e64 +v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_nlt_f16_e64 +v_cmpx_ne_u16 v1, v255 +// GFX11: v_cmpx_ne_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_o_f16_e64 +v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_t_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_tru_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_ne_u16 v255, v2 +// GFX11: v_cmpx_ne_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_cmpx_u_f16_e64 +v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_class_f16_e64 +v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ne_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_eq_f16_e64 +v_cmpx_neq_f16 v1, v255 +// GFX11: v_cmpx_neq_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_eq_i16_e64 +v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_eq_u16_e64 +v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_f_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_f_f16_e64 +v_cmpx_neq_f16 v255, v2 +// GFX11: v_cmpx_neq_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8d,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ge_f16_e64 +v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ge_i16_e64 +v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_neq_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ge_u16_e64 +v_cmpx_nge_f16 v1, v255 +// GFX11: v_cmpx_nge_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_gt_f16_e64 +v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_gt_i16_e64 +v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_gt_u16_e64 +v_cmpx_nge_f16 v255, v2 +// GFX11: v_cmpx_nge_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x89,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_le_f16_e64 +v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_le_i16_e64 +v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nge_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_le_u16_e64 +v_cmpx_ngt_f16 v1, v255 +// GFX11: v_cmpx_ngt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lg_f16_e64 +v_cmpx_ngt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64 +v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_i16_e64 +v_cmpx_ngt_f16 v255, v2 +// GFX11: v_cmpx_ngt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8b,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_u16_e64 +v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ne_i16_e64 +v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_ngt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ne_u16_e64 +v_cmpx_nle_f16 v1, v255 +// GFX11: v_cmpx_nle_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_neq_f16_e64 +v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_nge_f16_e64 +v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_ngt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ngt_f16_e64 +v_cmpx_nle_f16 v255, v2 +// GFX11: v_cmpx_nle_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8c,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_nle_f16_e64 +v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_nlg_f16_e64 +v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nle_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_nlt_f16_e64 +v_cmpx_nlg_f16 v1, v255 +// GFX11: v_cmpx_nlg_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_o_f16_e64 +v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_t_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_tru_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_nlg_f16 v255, v2 +// GFX11: v_cmpx_nlg_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8a,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_u_f16_e64 +v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_class_f16_e64 +v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_eq_f16_e64 +v_cmpx_nlt_f16 v1, v255 +// GFX11: v_cmpx_nlt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_eq_i16_e64 +v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_eq_u16_e64 +v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_f_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_f_f16_e64 +v_cmpx_nlt_f16 v255, v2 +// GFX11: v_cmpx_nlt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8e,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ge_f16_e64 +v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ge_i16_e64 +v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_nlt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ge_u16_e64 +v_cmpx_o_f16 v1, v255 +// GFX11: v_cmpx_o_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_gt_f16_e64 +v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_o_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_gt_i16_e64 +v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_o_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_gt_u16_e64 +v_cmpx_o_f16 v255, v2 +// GFX11: v_cmpx_o_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x87,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_le_f16_e64 +v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_o_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_le_i16_e64 +v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_o_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_le_u16_e64 +v_cmpx_t_f16 v1, v255 +// GFX11: v_cmpx_t_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lg_f16_e64 +v_cmpx_t_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_f16_e64 +v_cmpx_t_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_i16_e64 +v_cmpx_t_f16 v255, v2 +// GFX11: v_cmpx_t_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8f,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_lt_u16_e64 +v_cmpx_t_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8f,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ne_i16_e64 +v_cmpx_t_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ne_u16_e64 +v_cmpx_tru_f16 v1, v255 +// GFX11: v_cmpx_t_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_neq_f16_e64 +v_cmpx_tru_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_nge_f16_e64 +v_cmpx_tru_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_ngt_f16_e64 +v_cmpx_tru_f16 v255, v2 +// GFX11: v_cmpx_t_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8f,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_nle_f16_e64 +v_cmpx_tru_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_t_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8f,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_nlg_f16_e64 +v_cmpx_tru_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_t_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_nlt_f16_e64 +v_cmpx_u_f16 v1, v255 +// GFX11: v_cmpx_u_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_o_f16_e64 +v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cmpx_u_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_t_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_u_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_tru_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_t_f16_e64 +v_cmpx_u_f16 v255, v2 +// GFX11: v_cmpx_u_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x88,0xd4,0xff,0x05,0x02,0x00] v_cmpx_u_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cmpx_u_f16_e64 +// GFX11: v_cmpx_u_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] +v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_cmpx_u_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c-fake16.s new file mode 100644 index 000000000000000..76db94023fc903b --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c-fake16.s @@ -0,0 +1,8695 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, v255, v2 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, s1, v2 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, s105, v255 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, vcc_lo, s2 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, vcc_hi, s105 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x6b,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, ttmp15, ttmp15 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7b,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, m0, src_scc +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7d,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x7d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x7d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], v255, v2 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], s1, v2 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], s105, v255 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], vcc_lo, s2 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], vcc_hi, s105 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x6b,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], ttmp15, ttmp15 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7b,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], m0, src_scc +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7d,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x7d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi +// GFX12: encoding: [0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00] + +v_cmp_class_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x7e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x7e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x7e,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x7e,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x7e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x7e,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x7e,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64 null, -|0xaf123456|, vcc_hi +// GFX12: encoding: [0x7c,0x01,0x7e,0xd4,0xff,0xd6,0x00,0x20,0x56,0x34,0x12,0xaf] + +v_cmp_class_f64_e64 s5, v[1:2], v2 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, v[1:2], v255 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, v[1:2], s2 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, v[1:2], s105 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0xd3,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, v[254:255], ttmp15 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0xfe,0xf7,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, s[2:3], vcc_hi +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x02,0xd6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, s[104:105], vcc_lo +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x68,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, vcc, m0 +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x6a,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, ttmp[14:15], exec_hi +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x7a,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s5, exec, exec_lo +// W32: encoding: [0x05,0x00,0x7f,0xd4,0x7e,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s105, null, null +// W32: encoding: [0x69,0x00,0x7f,0xd4,0x7c,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x7f,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 vcc_hi, 0.5, 0.5 +// W32: encoding: [0x6b,0x00,0x7f,0xd4,0xf0,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 ttmp15, -|src_scc|, src_scc +// W32: encoding: [0x7b,0x01,0x7f,0xd4,0xfd,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[1:2], v2 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[1:2], v255 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[1:2], s2 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[1:2], s105 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xd3,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], v[254:255], ttmp15 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0xfe,0xf7,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], s[2:3], vcc_hi +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x02,0xd6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], s[104:105], vcc_lo +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x68,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], vcc, m0 +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x6a,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], ttmp[14:15], exec_hi +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7a,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], exec, exec_lo +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7e,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[10:11], null, null +// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7c,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x7f,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 vcc, 0.5, 0.5 +// W64: encoding: [0x6a,0x00,0x7f,0xd4,0xf0,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 ttmp[14:15], -|src_scc|, src_scc +// W64: encoding: [0x7a,0x01,0x7f,0xd4,0xfd,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456 +// GFX12: encoding: [0x7c,0x00,0x7f,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_eq_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x02,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x02,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x02,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x02,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x02,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x02,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x02,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x02,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x02,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_eq_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x12,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x12,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x12,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x12,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x12,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x12,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x12,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x12,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x12,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x12,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x12,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x12,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x12,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_eq_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x22,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x22,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x22,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x22,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x22,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x22,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x22,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x22,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x22,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x22,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x22,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x22,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x22,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x22,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x22,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x22,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_eq_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x32,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x32,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x32,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x32,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x32,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x32,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x32,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x32,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x32,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x32,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x32,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x32,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x32,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_eq_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x42,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x42,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x42,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x42,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x42,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x42,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x42,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x42,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x42,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x42,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x42,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x42,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_eq_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x52,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x52,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x52,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x52,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x52,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x52,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x52,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x52,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x52,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x52,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x52,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x52,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x52,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x52,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_eq_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3a,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3a,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3a,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3a,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3a,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3a,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3a,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x3a,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_eq_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4a,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4a,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4a,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4a,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4a,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4a,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4a,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x4a,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_eq_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5a,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5a,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5a,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5a,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5a,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5a,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5a,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5a,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x5a,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ge_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x06,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x06,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x06,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x06,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x06,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x06,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x06,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x06,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x06,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_ge_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x16,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x16,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x16,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x16,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x16,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x16,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x16,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x16,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x16,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x16,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x16,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x16,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x16,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_ge_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x26,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x26,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x26,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x26,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x26,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x26,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x26,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x26,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x26,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x26,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x26,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x26,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x26,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x26,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x26,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x26,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_ge_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x36,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x36,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x36,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x36,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x36,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x36,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x36,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x36,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x36,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x36,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x36,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x36,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x36,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_ge_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x46,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x46,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x46,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x46,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x46,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x46,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x46,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x46,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x46,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x46,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x46,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x46,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ge_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x56,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x56,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x56,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x56,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x56,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x56,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x56,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x56,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x56,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x56,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x56,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x56,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x56,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x56,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ge_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3e,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3e,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3e,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3e,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x3e,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_ge_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4e,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4e,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4e,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4e,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x4e,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ge_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5e,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5e,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5e,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5e,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5e,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5e,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5e,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5e,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x5e,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_gt_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x04,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x04,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x04,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x04,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x04,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x04,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x04,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x04,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x04,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_gt_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x14,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x14,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x14,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x14,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x14,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x14,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x14,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x14,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x14,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x14,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x14,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x14,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x14,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_gt_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x24,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x24,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x24,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x24,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x24,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x24,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x24,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x24,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x24,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x24,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x24,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x24,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x24,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x24,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x24,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x24,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_gt_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x34,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x34,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x34,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x34,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x34,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x34,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x34,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x34,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x34,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x34,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x34,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x34,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x34,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_gt_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x44,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x44,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x44,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x44,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x44,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x44,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x44,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x44,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x44,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x44,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x44,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x44,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_gt_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x54,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x54,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x54,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x54,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x54,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x54,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x54,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x54,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x54,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x54,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x54,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x54,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x54,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x54,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_gt_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3c,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3c,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3c,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3c,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3c,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3c,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3c,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x3c,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_gt_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4c,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4c,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4c,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4c,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4c,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4c,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4c,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x4c,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_gt_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5c,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5c,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5c,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5c,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5c,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5c,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5c,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5c,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x5c,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_le_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x03,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x03,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x03,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x03,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x03,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x03,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x03,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x03,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x03,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_le_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x13,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x13,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x13,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x13,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x13,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x13,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x13,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x13,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x13,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x13,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x13,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x13,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x13,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_le_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x23,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x23,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x23,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x23,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x23,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x23,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x23,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x23,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x23,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x23,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x23,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x23,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x23,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x23,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x23,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x23,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_le_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x33,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x33,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x33,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x33,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x33,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x33,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x33,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x33,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x33,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x33,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x33,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x33,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x33,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_le_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x43,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x43,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x43,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x43,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x43,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x43,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x43,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x43,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x43,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x43,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x43,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x43,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_le_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x53,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x53,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x53,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x53,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x53,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x53,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x53,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x53,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x53,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x53,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x53,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x53,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x53,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x53,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_le_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3b,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3b,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3b,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3b,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3b,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3b,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3b,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x3b,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_le_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4b,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4b,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4b,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4b,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4b,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4b,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4b,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x4b,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_le_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5b,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5b,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5b,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5b,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5b,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5b,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5b,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5b,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x5b,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_lg_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x05,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x05,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x05,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x05,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x05,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x05,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x05,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x05,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x05,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_lg_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x15,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x15,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x15,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x15,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x15,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x15,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x15,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x15,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x15,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x15,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x15,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x15,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x15,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_lg_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x25,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x25,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x25,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x25,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x25,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x25,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x25,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x25,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x25,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x25,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x25,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x25,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x25,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x25,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x25,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x25,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_lt_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x01,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x01,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x01,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x01,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x01,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x01,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x01,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x01,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x01,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x01,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x01,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x01,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x01,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x01,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_lt_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x11,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x11,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x11,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x11,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x11,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x11,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x11,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x11,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x11,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x11,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x11,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x11,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x11,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_lt_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x21,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x21,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x21,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x21,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x21,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x21,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x21,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x21,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x21,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x21,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x21,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x21,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x21,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x21,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x21,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x21,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_lt_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x31,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x31,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x31,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x31,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x31,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x31,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x31,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x31,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x31,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x31,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x31,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x31,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x31,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_lt_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x41,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x41,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x41,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x41,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x41,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x41,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x41,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x41,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x41,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x41,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x41,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x41,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_lt_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x51,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x51,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x51,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x51,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x51,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x51,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x51,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x51,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x51,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x51,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x51,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x51,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x51,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x51,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_lt_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x39,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x39,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x39,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x39,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x39,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x39,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x39,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x39,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x39,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x39,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x39,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x39,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x39,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_lt_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x49,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x49,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x49,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x49,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x49,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x49,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x49,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x49,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x49,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x49,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x49,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x49,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_lt_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x59,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x59,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x59,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x59,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x59,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x59,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x59,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x59,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x59,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x59,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x59,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x59,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x59,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x59,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ne_i16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x35,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x35,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x35,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x35,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x35,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x35,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x35,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x35,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x35,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x35,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x35,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x35,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x35,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_ne_i32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x45,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x45,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x45,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x45,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x45,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x45,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x45,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x45,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x45,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x45,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x45,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x45,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ne_i64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x55,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x55,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x55,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x55,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x55,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x55,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x55,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x55,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x55,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x55,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x55,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x55,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x55,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x55,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ne_u16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x3d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x3d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x3d,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x3d,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x3d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x3d,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x3d,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64 null, 0xfe0b, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x3d,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] + +v_cmp_ne_u32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s5, exec_hi, null +// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x4d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x4d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 vcc_hi, 0.5, m0 +// W32: encoding: [0x6b,0x00,0x4d,0xd4,0xf0,0xfa,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 ttmp15, src_scc, vcc_lo +// W32: encoding: [0x7b,0x00,0x4d,0xd4,0xfd,0xd4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], exec_hi, null +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x4d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 vcc, 0.5, m0 +// W64: encoding: [0x6a,0x00,0x4d,0xd4,0xf0,0xfa,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 ttmp[14:15], src_scc, vcc_lo +// W64: encoding: [0x7a,0x00,0x4d,0xd4,0xfd,0xd4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64 null, 0xaf123456, vcc_hi +// GFX12: encoding: [0x7c,0x00,0x4d,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_ne_u64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s5, exec, src_scc +// W32: encoding: [0x05,0x00,0x5d,0xd4,0x7e,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x5d,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x5d,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x5d,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 ttmp15, src_scc, exec +// W32: encoding: [0x7b,0x00,0x5d,0xd4,0xfd,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], exec, src_scc +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7e,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x5d,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x5d,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 ttmp[14:15], src_scc, exec +// W64: encoding: [0x7a,0x00,0x5d,0xd4,0xfd,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u64_e64 null, 0xaf123456, vcc +// GFX12: encoding: [0x7c,0x00,0x5d,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf] + +v_cmp_neq_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_neq_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1d,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1d,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1d,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1d,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1d,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1d,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1d,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1d,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1d,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x1d,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_neq_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2d,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2d,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2d,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2d,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2d,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2d,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2d,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2d,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2d,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x2d,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_nge_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x09,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x09,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x09,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x09,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x09,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x09,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x09,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x09,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x09,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_nge_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x19,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x19,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x19,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x19,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x19,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x19,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x19,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x19,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x19,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x19,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x19,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x19,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x19,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_nge_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x29,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x29,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x29,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x29,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x29,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x29,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x29,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x29,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x29,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x29,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x29,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x29,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x29,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x29,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x29,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x29,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_ngt_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0b,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0b,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0b,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0b,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0b,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_ngt_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1b,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1b,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1b,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1b,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1b,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1b,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1b,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1b,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1b,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x1b,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_ngt_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2b,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2b,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2b,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2b,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2b,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2b,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2b,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2b,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2b,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x2b,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_nle_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0c,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0c,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0c,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0c,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0c,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_nle_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1c,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1c,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1c,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1c,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1c,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1c,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1c,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1c,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1c,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x1c,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_nle_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2c,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2c,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2c,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2c,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2c,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2c,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2c,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2c,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2c,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x2c,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_nlg_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0a,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0a,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0a,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0a,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0a,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_nlg_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1a,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1a,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1a,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1a,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1a,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1a,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1a,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1a,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1a,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x1a,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_nlg_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2a,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2a,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2a,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2a,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2a,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2a,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2a,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2a,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2a,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x2a,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_nlt_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x0e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x0e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x0e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x0e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x0e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_nlt_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x1e,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x1e,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x1e,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x1e,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x1e,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x1e,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x1e,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x1e,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x1e,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x1e,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_nlt_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x2e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x2e,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x2e,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x2e,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x2e,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x2e,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x2e,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x2e,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x2e,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x2e,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x2e,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_o_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x07,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x07,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x07,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x07,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x07,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x07,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x07,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x07,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x07,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_o_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x17,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x17,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x17,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x17,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x17,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x17,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x17,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x17,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x17,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x17,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x17,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x17,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x17,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_o_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x27,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x27,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x27,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x27,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x27,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x27,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x27,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x27,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x27,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x27,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x27,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x27,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x27,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x27,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x27,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x27,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] + +v_cmp_u_f16_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x08,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, vcc_hi, 0xfe0b +// W32: encoding: [0x05,0x00,0x08,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x08,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x08,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x08,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x08,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x08,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], vcc_hi, 0xfe0b +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x08,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x08,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cmp_u_f32_e64 s5, v1, v2 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, v255, v255 +// W32: encoding: [0x05,0x00,0x18,0xd4,0xff,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, s1, s2 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x01,0x04,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, s105, s105 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x69,0xd2,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, vcc_lo, ttmp15 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x6a,0xf6,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, vcc_hi, 0xaf123456 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, ttmp15, src_scc +// W32: encoding: [0x05,0x00,0x18,0xd4,0x7b,0xfa,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, m0, 0.5 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x7d,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, exec_lo, -1 +// W32: encoding: [0x05,0x00,0x18,0xd4,0x7e,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s5, |exec_hi|, null +// W32: encoding: [0x05,0x01,0x18,0xd4,0x7f,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s105, null, exec_lo +// W32: encoding: [0x69,0x00,0x18,0xd4,0x7c,0xfc,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 vcc_lo, -1, exec_hi +// W32: encoding: [0x6a,0x00,0x18,0xd4,0xc1,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 vcc_hi, 0.5, -m0 +// W32: encoding: [0x6b,0x00,0x18,0xd4,0xf0,0xfa,0x00,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 ttmp15, -src_scc, |vcc_lo| +// W32: encoding: [0x7b,0x02,0x18,0xd4,0xfd,0xd4,0x00,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], v1, v2 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], v255, v255 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0xff,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], s1, s2 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x01,0x04,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], s105, s105 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x69,0xd2,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], vcc_lo, ttmp15 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x6a,0xf6,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], vcc_hi, 0xaf123456 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], ttmp15, src_scc +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7b,0xfa,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], m0, 0.5 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7d,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], exec_lo, -1 +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7e,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], |exec_hi|, null +// W64: encoding: [0x0a,0x01,0x18,0xd4,0x7f,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[10:11], null, exec_lo +// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7c,0xfc,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 s[104:105], -1, exec_hi +// W64: encoding: [0x68,0x00,0x18,0xd4,0xc1,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 vcc, 0.5, -m0 +// W64: encoding: [0x6a,0x00,0x18,0xd4,0xf0,0xfa,0x00,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 ttmp[14:15], -src_scc, |vcc_lo| +// W64: encoding: [0x7a,0x02,0x18,0xd4,0xfd,0xd4,0x00,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp +// GFX12: encoding: [0x7c,0x83,0x18,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] + +v_cmp_u_f64_e64 s5, v[1:2], v[2:3] +// W32: encoding: [0x05,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, v[254:255], v[254:255] +// W32: encoding: [0x05,0x00,0x28,0xd4,0xfe,0xfd,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, s[2:3], s[4:5] +// W32: encoding: [0x05,0x00,0x28,0xd4,0x02,0x08,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, s[104:105], s[104:105] +// W32: encoding: [0x05,0x00,0x28,0xd4,0x68,0xd0,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, vcc, ttmp[14:15] +// W32: encoding: [0x05,0x00,0x28,0xd4,0x6a,0xf4,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, ttmp[14:15], 0xaf123456 +// W32: encoding: [0x05,0x00,0x28,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s5, -|exec|, src_scc +// W32: encoding: [0x05,0x01,0x28,0xd4,0x7e,0xfa,0x01,0x20] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s105, null, 0.5 +// W32: encoding: [0x69,0x00,0x28,0xd4,0x7c,0xe0,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 vcc_lo, -1, -1 +// W32: encoding: [0x6a,0x00,0x28,0xd4,0xc1,0x82,0x01,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 vcc_hi, 0.5, null +// W32: encoding: [0x6b,0x00,0x28,0xd4,0xf0,0xf8,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 ttmp15, -|src_scc|, -|exec| +// W32: encoding: [0x7b,0x03,0x28,0xd4,0xfd,0xfc,0x00,0x60] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], v[1:2], v[2:3] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], v[254:255], v[254:255] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0xfe,0xfd,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], s[2:3], s[4:5] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x02,0x08,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], s[104:105], s[104:105] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x68,0xd0,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], vcc, ttmp[14:15] +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x6a,0xf4,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], ttmp[14:15], 0xaf123456 +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], -|exec|, src_scc +// W64: encoding: [0x0a,0x01,0x28,0xd4,0x7e,0xfa,0x01,0x20] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[10:11], null, 0.5 +// W64: encoding: [0x0a,0x00,0x28,0xd4,0x7c,0xe0,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 s[104:105], -1, -1 +// W64: encoding: [0x68,0x00,0x28,0xd4,0xc1,0x82,0x01,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 vcc, 0.5, null +// W64: encoding: [0x6a,0x00,0x28,0xd4,0xf0,0xf8,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 ttmp[14:15], -|src_scc|, -|exec| +// W64: encoding: [0x7a,0x03,0x28,0xd4,0xfd,0xfc,0x00,0x60] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f64_e64 null, 0xaf123456, -|vcc| clamp +// GFX12: encoding: [0x7c,0x82,0x28,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s index 98dbbf6cff448b5..76db94023fc903b 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_e64 s5, v1, v2 // W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc-fake16.s new file mode 100644 index 000000000000000..ebac9fed92503ee --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc-fake16.s @@ -0,0 +1,9076 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_e32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0xfa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0xfa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0xfc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0xfd,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0xfc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0xfd,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, v[1:2], v2 +// W32: encoding: [0x01,0x05,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, v[254:255], v2 +// W32: encoding: [0xfe,0x05,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, s[2:3], v2 +// W32: encoding: [0x02,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, s[104:105], v2 +// W32: encoding: [0x68,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, vcc, v2 +// W32: encoding: [0x6a,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, ttmp[14:15], v2 +// W32: encoding: [0x7a,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, exec, v2 +// W32: encoding: [0x7e,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0xfe,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, v[1:2], v2 +// W64: encoding: [0x01,0x05,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, v[254:255], v2 +// W64: encoding: [0xfe,0x05,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, s[2:3], v2 +// W64: encoding: [0x02,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, s[104:105], v2 +// W64: encoding: [0x68,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, vcc, v2 +// W64: encoding: [0x6a,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, ttmp[14:15], v2 +// W64: encoding: [0x7a,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, exec, v2 +// W64: encoding: [0x7e,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0xfe,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f64 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x04,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x04,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x24,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x25,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x24,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x25,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x44,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x45,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x44,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x45,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x64,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x64,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x64,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x64,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x84,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x85,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x84,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x85,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xa4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xa5,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xa4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xa5,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x74,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x74,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x74,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x74,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x94,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x95,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x94,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x95,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xb4,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xb4,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x0c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x0c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x2c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x2d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x2c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x2d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x4c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x4d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x4c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x4d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x6c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x6c,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x6c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x6c,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x8c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x8d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x8c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x8d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xac,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xad,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xac,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xad,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x7c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x7c,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x7c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x7c,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x9c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x9d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x9c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x9d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xbc,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xbc,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x08,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x08,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x28,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x29,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x28,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x29,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x48,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x49,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x48,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x49,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x68,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x68,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x68,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x68,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x88,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x89,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x88,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x89,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xa8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xa9,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xa8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xa9,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x78,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x78,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x78,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x78,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x98,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x99,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x98,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x99,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xb8,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xb8,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x06,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x06,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x26,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x27,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x26,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x27,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x46,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x47,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x46,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x47,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x66,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x66,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x66,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x66,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x86,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x87,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x86,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x87,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xa6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xa7,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xa6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xa7,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x76,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x76,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x76,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x76,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x96,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x97,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x96,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x97,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xb6,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xb6,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x0a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x0a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x2a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x2b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x2a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x2b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x4a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x4b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x4a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x4b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x02,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x02,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x02,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x02,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x22,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x23,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x22,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x23,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x42,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x43,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x42,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x43,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x62,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x62,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x62,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x62,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x82,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x83,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x82,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x83,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xa2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xa3,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xa2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xa3,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x72,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x72,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x72,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x72,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x92,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x93,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x92,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x93,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xb2,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xb3,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xb2,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xb3,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x6a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x6a,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x6a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x6a,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x8a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x8b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x8a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x8b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xaa,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xab,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xaa,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xab,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x7a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x7a,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x7a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x7a,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x9a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x9b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x9a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x9b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0xba,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0xba,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x1a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x1a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x3a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x3b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x3a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x3b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x5a,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x5a,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x12,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x12,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x32,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x33,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x32,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x33,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x52,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x52,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x16,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x16,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x36,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x37,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x36,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x37,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x56,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x56,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x18,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x18,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x38,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x39,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x38,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x39,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x58,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x58,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x14,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x14,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x34,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x35,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x34,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x35,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x54,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x54,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x1c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x1c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x3c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x3d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x3c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x3d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x5c,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x5c,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x0e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x0e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x2e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x2f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x2e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x2f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x4e,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x4e,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v127, v2 +// W32: encoding: [0x7f,0x05,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x10,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, 0xfe0b, v127 +// W32: encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v127, v2 +// W64: encoding: [0x7f,0x05,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x10,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, 0xfe0b, v127 +// W64: encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 +// W32: encoding: [0x01,0x05,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v255, v2 +// W32: encoding: [0xff,0x05,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, s1, v2 +// W32: encoding: [0x01,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, s105, v2 +// W32: encoding: [0x69,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, vcc_lo, v2 +// W32: encoding: [0x6a,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, vcc_hi, v2 +// W32: encoding: [0x6b,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, ttmp15, v2 +// W32: encoding: [0x7b,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, m0, v2 +// W32: encoding: [0x7d,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, exec_lo, v2 +// W32: encoding: [0x7e,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, exec_hi, v2 +// W32: encoding: [0x7f,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, null, v2 +// W32: encoding: [0x7c,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, -1, v2 +// W32: encoding: [0xc1,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, 0.5, v2 +// W32: encoding: [0xf0,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, src_scc, v2 +// W32: encoding: [0xfd,0x04,0x30,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, 0xaf123456, v255 +// W32: encoding: [0xff,0xfe,0x31,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 +// W64: encoding: [0x01,0x05,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v255, v2 +// W64: encoding: [0xff,0x05,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, s1, v2 +// W64: encoding: [0x01,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, s105, v2 +// W64: encoding: [0x69,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, vcc_lo, v2 +// W64: encoding: [0x6a,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, vcc_hi, v2 +// W64: encoding: [0x6b,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, ttmp15, v2 +// W64: encoding: [0x7b,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, m0, v2 +// W64: encoding: [0x7d,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, exec_lo, v2 +// W64: encoding: [0x7e,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, exec_hi, v2 +// W64: encoding: [0x7f,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, null, v2 +// W64: encoding: [0x7c,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, -1, v2 +// W64: encoding: [0xc1,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, 0.5, v2 +// W64: encoding: [0xf0,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, src_scc, v2 +// W64: encoding: [0xfd,0x04,0x30,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, 0xaf123456, v255 +// W64: encoding: [0xff,0xfe,0x31,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, v[1:2], v[2:3] +// W32: encoding: [0x01,0x05,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, v[254:255], v[2:3] +// W32: encoding: [0xfe,0x05,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, s[2:3], v[2:3] +// W32: encoding: [0x02,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, s[104:105], v[2:3] +// W32: encoding: [0x68,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, vcc, v[2:3] +// W32: encoding: [0x6a,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, ttmp[14:15], v[2:3] +// W32: encoding: [0x7a,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, exec, v[2:3] +// W32: encoding: [0x7e,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, null, v[2:3] +// W32: encoding: [0x7c,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, -1, v[2:3] +// W32: encoding: [0xc1,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, 0.5, v[2:3] +// W32: encoding: [0xf0,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, src_scc, v[2:3] +// W32: encoding: [0xfd,0x04,0x50,0x7c] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc_lo, 0xaf123456, v[254:255] +// W32: encoding: [0xff,0xfc,0x51,0x7c,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, v[1:2], v[2:3] +// W64: encoding: [0x01,0x05,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, v[254:255], v[2:3] +// W64: encoding: [0xfe,0x05,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, s[2:3], v[2:3] +// W64: encoding: [0x02,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, s[104:105], v[2:3] +// W64: encoding: [0x68,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, vcc, v[2:3] +// W64: encoding: [0x6a,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, ttmp[14:15], v[2:3] +// W64: encoding: [0x7a,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, exec, v[2:3] +// W64: encoding: [0x7e,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, null, v[2:3] +// W64: encoding: [0x7c,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, -1, v[2:3] +// W64: encoding: [0xc1,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, 0.5, v[2:3] +// W64: encoding: [0xf0,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, src_scc, v[2:3] +// W64: encoding: [0xfd,0x04,0x50,0x7c] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f64 vcc, 0xaf123456, v[254:255] +// W64: encoding: [0xff,0xfc,0x51,0x7c,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s index c9241ebd161de47..4ae4f74ad21965e 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_e32 vcc_lo, v1, v2 // W32: encoding: [0x01,0x05,0xfa,0x7c] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16-fake16.s new file mode 100644 index 000000000000000..13c4f89cb70240c --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16-fake16.s @@ -0,0 +1,6052 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x25,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x25,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x64,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x64,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x85,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x85,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x74,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x74,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x2d,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x2d,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x6c,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x6c,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x8d,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x8d,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x7c,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x7c,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x68,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x68,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x89,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x89,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x78,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x78,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x27,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x27,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x66,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x66,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x87,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x87,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x76,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x76,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x2b,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x2b,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x02,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x02,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x23,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x23,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x62,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x62,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x83,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x83,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x72,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x72,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x93,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x93,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x6a,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x6a,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x8b,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x8b,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x7a,0x7c,0x7f,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x7a,0x7c,0x7f,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_mirror +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_half_mirror +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_shl:1 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_shl:15 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_shr:1 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_shr:15 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_ror:1 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_ror:15 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0x31,0x7c,0xff,0x6f,0xf5,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_mirror +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_half_mirror +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_shl:1 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_shl:15 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_shr:1 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_shr:15 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_ror:1 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_ror:15 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0x31,0x7c,0xff,0x6f,0xf5,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s index 0c3a38626fa6c09..0c36108cb0cbe1c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0] // W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8-fake16.s new file mode 100644 index 000000000000000..87305ec913d1f3e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8-fake16.s @@ -0,0 +1,1300 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x24,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x25,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x24,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x25,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x64,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x64,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x64,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x64,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x64,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x64,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x84,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x84,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x85,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x84,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x84,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x85,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x74,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x74,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x74,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x74,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x74,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x74,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x94,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x94,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x94,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x94,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x2d,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x2d,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x6c,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x6c,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x8d,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x8d,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x7c,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x7c,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x28,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x29,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x28,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x29,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x68,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x68,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x68,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x68,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x68,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x68,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x88,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x88,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x89,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x88,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x88,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x89,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x78,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x78,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x78,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x78,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x78,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x78,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x98,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x98,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x98,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x98,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x26,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x27,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x26,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x27,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x66,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x66,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x66,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x66,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x66,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x66,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x86,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x86,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x87,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x86,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x86,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x87,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x76,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x76,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x76,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x76,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x76,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x76,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x96,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x96,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x96,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x96,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x2b,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x2b,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x02,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x02,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x02,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x02,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x02,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x02,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x22,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x22,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x23,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x22,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x22,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x23,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x62,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x62,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x62,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x62,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x62,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x62,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x82,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x82,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x83,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x82,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x82,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x83,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x72,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x72,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x72,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x72,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x72,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x72,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x92,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x92,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x93,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x92,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x92,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_lt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x93,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x6a,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x6a,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x8b,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x8b,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x7a,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x7a,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x32,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x32,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x36,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x36,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x38,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x38,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x34,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x34,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x30,0x7c,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0x31,0x7c,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x30,0x7c,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cmp_u_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0x31,0x7c,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s index ceecbc660d06cb8..2a4095f99d834f2 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s index ebdd8adc0adf99a..e603e7388a684e0 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s @@ -1,1774 +1,1775 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s v_cmp_class_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_class_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_class_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:40: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:40: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_i16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_i16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_i16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_i16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_u16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_eq_u16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_u16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_eq_u16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_i16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_i16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_i16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_u16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ge_u16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_u16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ge_u16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_gt_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_gt_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_gt_i16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_gt_i16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_i16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_i16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_u16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_gt_u16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_gt_u16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_gt_u16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_u16_e32 vcc, v127, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_gt_u16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc, vcc_hi, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc, vcc_lo, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_u16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_gt_u16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_i16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_i16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_i16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_i16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_u16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_le_u16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_u16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_le_u16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lg_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lg_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lg_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_i16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_i16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_i16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_i16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_u16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_lt_u16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_u16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_lt_u16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ne_i16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction v_cmp_ne_i16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ne_i16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ne_i16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_ne_u16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction -v_cmp_ne_u16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v127, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc, vcc_hi, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ne_u16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction v_cmp_ne_u16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_neq_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_neq_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_neq_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nge_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nge_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nge_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_ngt_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_ngt_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_ngt_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nle_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nle_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nle_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nlg_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nlg_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nlg_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nlt_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction v_cmp_nlt_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nlt_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction v_cmp_o_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction v_cmp_o_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction v_cmp_o_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction + +v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_u_f16_e32 vcc, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction v_cmp_u_f16_e32 vcc, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction + +v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction v_cmp_u_f16_e32 vcc, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_u_f16_e32 vcc, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmp_u_f16_e32 vcc_lo, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_u_f16_e32 vcc_lo, v127, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction -v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction -v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, v127, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_class_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction -v_cmp_class_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction -v_cmp_eq_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, v128, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_eq_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction -v_cmp_eq_i16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction -v_cmp_eq_i16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmp_eq_u16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_eq_u16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_i16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_i16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_u16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_u16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_i16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_i16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_u16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_u16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_i16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_i16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_u16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_u16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lg_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lg_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_i16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_i16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_u16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_u16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_i16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_i16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_u16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_u16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_neq_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_neq_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nge_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nge_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ngt_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ngt_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nle_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nle_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlg_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlg_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlt_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlt_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_o_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_o_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16_e32 vcc_lo, v128, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s index 65c0a3c874efbd0..e56c46bb55448c5 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s @@ -1,2368 +1,2369 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 > /dev/null | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 > /dev/null | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_cmp_class_f16 vcc, v1, v255 -// W64: v_cmp_class_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_class_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction + +v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction v_cmp_class_f16 vcc, v127, v255 -// W64: v_cmp_class_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_class_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction + +v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction + +v_cmp_class_f16 vcc, v128, v2 +// W64: v_cmp_class_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction + +v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction v_cmp_class_f16 vcc, vcc_hi, v255 -// W64: v_cmp_class_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_class_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc, vcc_lo, v255 -// W64: v_cmp_class_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_class_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc_lo, v127, v255 -// W32: v_cmp_class_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_class_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction + +v_cmp_class_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction + +v_cmp_class_f16 vcc_lo, v128, v2 +// W32: v_cmp_class_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_class_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction + +v_cmp_class_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction v_cmp_class_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_class_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_class_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_class_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_class_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_class_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, v1, v255 -// W64: v_cmp_eq_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_f16 vcc, v127, v255 -// W64: v_cmp_eq_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_f16 vcc, v128, v2 +// W64: v_cmp_eq_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_f16 vcc, vcc_hi, v255 -// W64: v_cmp_eq_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc, vcc_lo, v255 -// W64: v_cmp_eq_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, v1, v255 -// W32: v_cmp_eq_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_f16 vcc_lo, v127, v255 -// W32: v_cmp_eq_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_f16 vcc_lo, v128, v2 +// W32: v_cmp_eq_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, v1, v255 -// W64: v_cmp_eq_i16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_i16 vcc, v127, v255 -// W64: v_cmp_eq_i16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_i16 vcc, v128, v2 +// W64: v_cmp_eq_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x32,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_i16 vcc, vcc_hi, v255 -// W64: v_cmp_eq_i16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc, vcc_lo, v255 -// W64: v_cmp_eq_i16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, v1, v255 -// W32: v_cmp_eq_i16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_i16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_i16 vcc_lo, v127, v255 -// W32: v_cmp_eq_i16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_i16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_i16 vcc_lo, v128, v2 +// W32: v_cmp_eq_i16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x32,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_i16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_eq_i16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_i16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_i16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_eq_i16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_i16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x32,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, v1, v255 -// W64: v_cmp_eq_u16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_u16 vcc, v127, v255 -// W64: v_cmp_eq_u16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_u16 vcc, v128, v2 +// W64: v_cmp_eq_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3a,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_eq_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_u16 vcc, vcc_hi, v255 -// W64: v_cmp_eq_u16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc, vcc_lo, v255 -// W64: v_cmp_eq_u16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_eq_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, v1, v255 -// W32: v_cmp_eq_u16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_u16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_u16 vcc_lo, v127, v255 -// W32: v_cmp_eq_u16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_u16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_u16 vcc_lo, v128, v2 +// W32: v_cmp_eq_u16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x3a,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_eq_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_eq_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_eq_u16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_eq_u16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_u16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_eq_u16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_eq_u16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_eq_u16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, v1, v255 -// W64: v_cmp_ge_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_f16 vcc, v127, v255 -// W64: v_cmp_ge_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_f16 vcc, v128, v2 +// W64: v_cmp_ge_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_f16 vcc, vcc_hi, v255 -// W64: v_cmp_ge_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc, vcc_lo, v255 -// W64: v_cmp_ge_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, v1, v255 -// W32: v_cmp_ge_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_f16 vcc_lo, v127, v255 -// W32: v_cmp_ge_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_f16 vcc_lo, v128, v2 +// W32: v_cmp_ge_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, v1, v255 -// W64: v_cmp_ge_i16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_i16 vcc, v127, v255 -// W64: v_cmp_ge_i16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_i16 vcc, v128, v2 +// W64: v_cmp_ge_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x36,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_i16 vcc, vcc_hi, v255 -// W64: v_cmp_ge_i16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc, vcc_lo, v255 -// W64: v_cmp_ge_i16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, v1, v255 -// W32: v_cmp_ge_i16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_i16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_i16 vcc_lo, v127, v255 -// W32: v_cmp_ge_i16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_i16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_i16 vcc_lo, v128, v2 +// W32: v_cmp_ge_i16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x36,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_i16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_ge_i16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_i16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_i16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_ge_i16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_i16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x36,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, v1, v255 -// W64: v_cmp_ge_u16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_u16 vcc, v127, v255 -// W64: v_cmp_ge_u16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_u16 vcc, v128, v2 +// W64: v_cmp_ge_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3e,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ge_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_u16 vcc, vcc_hi, v255 -// W64: v_cmp_ge_u16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc, vcc_lo, v255 -// W64: v_cmp_ge_u16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ge_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, v1, v255 -// W32: v_cmp_ge_u16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_u16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_u16 vcc_lo, v127, v255 -// W32: v_cmp_ge_u16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_u16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_u16 vcc_lo, v128, v2 +// W32: v_cmp_ge_u16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x3e,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ge_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ge_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ge_u16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_ge_u16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_u16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ge_u16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_ge_u16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ge_u16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, v1, v255 -// W64: v_cmp_gt_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_gt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_f16 vcc, v127, v255 -// W64: v_cmp_gt_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_gt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_f16 vcc, v128, v2 +// W64: v_cmp_gt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_f16 vcc, vcc_hi, v255 -// W64: v_cmp_gt_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_gt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc, vcc_lo, v255 -// W64: v_cmp_gt_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_gt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, v1, v255 -// W32: v_cmp_gt_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_f16 vcc_lo, v127, v255 -// W32: v_cmp_gt_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_f16 vcc_lo, v128, v2 +// W32: v_cmp_gt_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, v1, v255 -// W64: v_cmp_gt_i16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_gt_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_i16 vcc, v127, v255 -// W64: v_cmp_gt_i16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_gt_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_i16 vcc, v128, v2 +// W64: v_cmp_gt_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x34,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_i16 vcc, vcc_hi, v255 -// W64: v_cmp_gt_i16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_gt_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc, vcc_lo, v255 -// W64: v_cmp_gt_i16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_gt_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, v1, v255 -// W32: v_cmp_gt_i16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_i16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_i16 vcc_lo, v127, v255 -// W32: v_cmp_gt_i16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_i16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_i16 vcc_lo, v128, v2 +// W32: v_cmp_gt_i16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x34,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_i16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_gt_i16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_i16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_i16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_gt_i16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_i16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x34,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc, v1, v255 -// W64: v_cmp_gt_u16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_gt_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cmp_gt_u16 vcc, v127, v255 -// W64: v_cmp_gt_u16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction -v_cmp_gt_u16 vcc, vcc_hi, v255 -// W64: v_cmp_gt_u16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction -v_cmp_gt_u16 vcc, vcc_lo, v255 -// W64: v_cmp_gt_u16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_u16 vcc, v127, v255 +// W64: v_cmp_gt_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cmp_gt_u16 vcc_lo, v1, v255 -// W32: v_cmp_gt_u16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_u16 vcc, v128, v2 +// W64: v_cmp_gt_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3c,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_gt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_u16 vcc, vcc_hi, v255 +// W64: v_cmp_gt_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc, vcc_lo, v255 +// W64: v_cmp_gt_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v255 +// W32: v_cmp_gt_u16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_u16 vcc_lo, v127, v255 -// W32: v_cmp_gt_u16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_u16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_u16 vcc_lo, v128, v2 +// W32: v_cmp_gt_u16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x3c,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_gt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_gt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_gt_u16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_gt_u16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_u16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_gt_u16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_gt_u16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_gt_u16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, v1, v255 -// W64: v_cmp_le_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_f16 vcc, v127, v255 -// W64: v_cmp_le_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_f16 vcc, v128, v2 +// W64: v_cmp_le_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_f16 vcc, vcc_hi, v255 -// W64: v_cmp_le_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc, vcc_lo, v255 -// W64: v_cmp_le_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, v1, v255 -// W32: v_cmp_le_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_f16 vcc_lo, v127, v255 -// W32: v_cmp_le_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_f16 vcc_lo, v128, v2 +// W32: v_cmp_le_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_le_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_le_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, v1, v255 -// W64: v_cmp_le_i16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_i16 vcc, v127, v255 -// W64: v_cmp_le_i16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_i16 vcc, v128, v2 +// W64: v_cmp_le_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x33,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_i16 vcc, vcc_hi, v255 -// W64: v_cmp_le_i16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc, vcc_lo, v255 -// W64: v_cmp_le_i16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, v1, v255 -// W32: v_cmp_le_i16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_i16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_i16 vcc_lo, v127, v255 -// W32: v_cmp_le_i16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_i16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_i16 vcc_lo, v128, v2 +// W32: v_cmp_le_i16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x33,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_i16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_le_i16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_i16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_i16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_le_i16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_i16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x33,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, v1, v255 -// W64: v_cmp_le_u16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_u16 vcc, v127, v255 -// W64: v_cmp_le_u16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_u16 vcc, v128, v2 +// W64: v_cmp_le_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3b,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_le_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_u16 vcc, vcc_hi, v255 -// W64: v_cmp_le_u16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc, vcc_lo, v255 -// W64: v_cmp_le_u16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_le_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, v1, v255 -// W32: v_cmp_le_u16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_u16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_u16 vcc_lo, v127, v255 -// W32: v_cmp_le_u16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_u16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_u16 vcc_lo, v128, v2 +// W32: v_cmp_le_u16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x3b,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_le_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_le_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_le_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_le_u16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_le_u16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_u16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_le_u16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_le_u16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_le_u16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, v1, v255 -// W64: v_cmp_lg_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lg_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lg_f16 vcc, v127, v255 -// W64: v_cmp_lg_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lg_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lg_f16 vcc, v128, v2 +// W64: v_cmp_lg_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lg_f16 vcc, vcc_hi, v255 -// W64: v_cmp_lg_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lg_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc, vcc_lo, v255 -// W64: v_cmp_lg_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lg_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, v1, v255 -// W32: v_cmp_lg_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lg_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lg_f16 vcc_lo, v127, v255 -// W32: v_cmp_lg_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lg_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lg_f16 vcc_lo, v128, v2 +// W32: v_cmp_lg_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lg_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lg_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, v1, v255 -// W64: v_cmp_lt_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_f16 vcc, v127, v255 -// W64: v_cmp_lt_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_f16 vcc, v128, v2 +// W64: v_cmp_lt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x01,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_f16 vcc, vcc_hi, v255 -// W64: v_cmp_lt_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc, vcc_lo, v255 -// W64: v_cmp_lt_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, v1, v255 -// W32: v_cmp_lt_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_f16 vcc_lo, v127, v255 -// W32: v_cmp_lt_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_f16 vcc_lo, v128, v2 +// W32: v_cmp_lt_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x01,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_lt_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_lt_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x01,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, v1, v255 -// W64: v_cmp_lt_i16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_i16 vcc, v127, v255 -// W64: v_cmp_lt_i16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_i16 vcc, v128, v2 +// W64: v_cmp_lt_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x31,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_i16 vcc, vcc_hi, v255 -// W64: v_cmp_lt_i16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc, vcc_lo, v255 -// W64: v_cmp_lt_i16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, v1, v255 -// W32: v_cmp_lt_i16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_i16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_i16 vcc_lo, v127, v255 -// W32: v_cmp_lt_i16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_i16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_i16 vcc_lo, v128, v2 +// W32: v_cmp_lt_i16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x31,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_i16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_lt_i16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_i16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_i16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_lt_i16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_i16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x31,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, v1, v255 -// W64: v_cmp_lt_u16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_u16 vcc, v127, v255 -// W64: v_cmp_lt_u16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_u16 vcc, v128, v2 +// W64: v_cmp_lt_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x39,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_lt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_u16 vcc, vcc_hi, v255 -// W64: v_cmp_lt_u16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc, vcc_lo, v255 -// W64: v_cmp_lt_u16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_lt_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, v1, v255 -// W32: v_cmp_lt_u16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_u16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_u16 vcc_lo, v127, v255 -// W32: v_cmp_lt_u16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_u16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_u16 vcc_lo, v128, v2 +// W32: v_cmp_lt_u16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x39,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_lt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_lt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_lt_u16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_lt_u16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_u16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_lt_u16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_lt_u16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_lt_u16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x39,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, v1, v255 -// W64: v_cmp_ne_i16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ne_i16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ne_i16 vcc, v127, v255 -// W64: v_cmp_ne_i16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ne_i16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_i16 vcc, v128, v2 +// W64: v_cmp_ne_i16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x35,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ne_i16 vcc, vcc_hi, v255 -// W64: v_cmp_ne_i16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ne_i16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc, vcc_lo, v255 -// W64: v_cmp_ne_i16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ne_i16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, v1, v255 -// W32: v_cmp_ne_i16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ne_i16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ne_i16 vcc_lo, v127, v255 -// W32: v_cmp_ne_i16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ne_i16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_i16 vcc_lo, v128, v2 +// W32: v_cmp_ne_i16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x35,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ne_i16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_ne_i16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ne_i16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_i16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_ne_i16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ne_i16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x35,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, v1, v255 -// W64: v_cmp_ne_u16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ne_u16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cmp_ne_u16 vcc, v127, v255 -// W64: v_cmp_ne_u16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction -v_cmp_ne_u16 vcc, vcc_hi, v255 -// W64: v_cmp_ne_u16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_u16 vcc, v127, v255 +// W64: v_cmp_ne_u16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_u16 vcc, v128, v2 +// W64: v_cmp_ne_u16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x3d,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ne_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_u16 vcc, vcc_hi, v255 +// W64: v_cmp_ne_u16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc, vcc_lo, v255 -// W64: v_cmp_ne_u16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ne_u16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, v1, v255 -// W32: v_cmp_ne_u16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ne_u16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ne_u16 vcc_lo, v127, v255 -// W32: v_cmp_ne_u16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ne_u16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_u16 vcc_lo, v128, v2 +// W32: v_cmp_ne_u16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x3d,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ne_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction + +v_cmp_ne_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction v_cmp_ne_u16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_ne_u16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ne_u16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ne_u16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_ne_u16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ne_u16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, v1, v255 -// W64: v_cmp_neq_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_neq_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_neq_f16 vcc, v127, v255 -// W64: v_cmp_neq_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_neq_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_neq_f16 vcc, v128, v2 +// W64: v_cmp_neq_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_neq_f16 vcc, vcc_hi, v255 -// W64: v_cmp_neq_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_neq_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc, vcc_lo, v255 -// W64: v_cmp_neq_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_neq_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, v1, v255 -// W32: v_cmp_neq_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_neq_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_neq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_neq_f16 vcc_lo, v127, v255 -// W32: v_cmp_neq_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_neq_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_neq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_neq_f16 vcc_lo, v128, v2 +// W32: v_cmp_neq_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_neq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_neq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_neq_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_neq_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, v1, v255 -// W64: v_cmp_nge_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nge_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nge_f16 vcc, v127, v255 -// W64: v_cmp_nge_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nge_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nge_f16 vcc, v128, v2 +// W64: v_cmp_nge_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nge_f16 vcc, vcc_hi, v255 -// W64: v_cmp_nge_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nge_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc, vcc_lo, v255 -// W64: v_cmp_nge_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nge_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, v1, v255 -// W32: v_cmp_nge_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nge_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nge_f16 vcc_lo, v127, v255 -// W32: v_cmp_nge_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nge_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nge_f16 vcc_lo, v128, v2 +// W32: v_cmp_nge_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nge_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nge_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, v1, v255 -// W64: v_cmp_ngt_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ngt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_ngt_f16 vcc, v127, v255 -// W64: v_cmp_ngt_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ngt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_ngt_f16 vcc, v128, v2 +// W64: v_cmp_ngt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_ngt_f16 vcc, vcc_hi, v255 -// W64: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc, vcc_lo, v255 -// W64: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, v1, v255 -// W32: v_cmp_ngt_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ngt_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_ngt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_ngt_f16 vcc_lo, v127, v255 -// W32: v_cmp_ngt_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ngt_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_ngt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_ngt_f16 vcc_lo, v128, v2 +// W32: v_cmp_ngt_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_ngt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_ngt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_ngt_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_ngt_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, v1, v255 -// W64: v_cmp_nle_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nle_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nle_f16 vcc, v127, v255 -// W64: v_cmp_nle_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nle_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nle_f16 vcc, v128, v2 +// W64: v_cmp_nle_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nle_f16 vcc, vcc_hi, v255 -// W64: v_cmp_nle_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nle_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc, vcc_lo, v255 -// W64: v_cmp_nle_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nle_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, v1, v255 -// W32: v_cmp_nle_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nle_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nle_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nle_f16 vcc_lo, v127, v255 -// W32: v_cmp_nle_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nle_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nle_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nle_f16 vcc_lo, v128, v2 +// W32: v_cmp_nle_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nle_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nle_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nle_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nle_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, v1, v255 -// W64: v_cmp_nlg_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nlg_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nlg_f16 vcc, v127, v255 -// W64: v_cmp_nlg_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nlg_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlg_f16 vcc, v128, v2 +// W64: v_cmp_nlg_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nlg_f16 vcc, vcc_hi, v255 -// W64: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc, vcc_lo, v255 -// W64: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, v1, v255 -// W32: v_cmp_nlg_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nlg_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nlg_f16 vcc_lo, v127, v255 -// W32: v_cmp_nlg_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nlg_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlg_f16 vcc_lo, v128, v2 +// W32: v_cmp_nlg_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nlg_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nlg_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, v1, v255 -// W64: v_cmp_nlt_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nlt_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nlt_f16 vcc, v127, v255 -// W64: v_cmp_nlt_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nlt_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlt_f16 vcc, v128, v2 +// W64: v_cmp_nlt_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nlt_f16 vcc, vcc_hi, v255 -// W64: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc, vcc_lo, v255 -// W64: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, v1, v255 -// W32: v_cmp_nlt_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nlt_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nlt_f16 vcc_lo, v127, v255 -// W32: v_cmp_nlt_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nlt_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlt_f16 vcc_lo, v128, v2 +// W32: v_cmp_nlt_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_nlt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction + +v_cmp_nlt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction v_cmp_nlt_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_nlt_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, v1, v255 -// W64: v_cmp_o_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_o_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction v_cmp_o_f16 vcc, v127, v255 -// W64: v_cmp_o_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_o_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_o_f16 vcc, v128, v2 +// W64: v_cmp_o_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction v_cmp_o_f16 vcc, vcc_hi, v255 -// W64: v_cmp_o_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_o_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc, vcc_lo, v255 -// W64: v_cmp_o_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_o_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, v1, v255 -// W32: v_cmp_o_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_o_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_o_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction v_cmp_o_f16 vcc_lo, v127, v255 -// W32: v_cmp_o_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_o_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_o_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_o_f16 vcc_lo, v128, v2 +// W32: v_cmp_o_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_o_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_o_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction v_cmp_o_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_o_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_o_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_o_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_o_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_o_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, v1, v255 -// W64: v_cmp_u_f16_e64 vcc, v1, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_u_f16_e64 vcc, v1, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction v_cmp_u_f16 vcc, v127, v255 -// W64: v_cmp_u_f16_e64 vcc, v127, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_u_f16_e64 vcc, v127, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_u_f16 vcc, v128, v2 +// W64: v_cmp_u_f16_e64 vcc, v128, v2 ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode + +v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction + +v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0] +// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction v_cmp_u_f16 vcc, vcc_hi, v255 -// W64: v_cmp_u_f16_e64 vcc, vcc_hi, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_u_f16_e64 vcc, vcc_hi, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc, vcc_lo, v255 -// W64: v_cmp_u_f16_e64 vcc, vcc_lo, v255 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W64: v_cmp_u_f16_e64 vcc, vcc_lo, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00] +// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode v_cmp_u_f16 vcc_lo, v1, v255 -// W32: v_cmp_u_f16_e64 vcc_lo, v1, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// W32: v_cmp_u_f16_e64 vcc_lo, v1, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cmp_u_f16 vcc_lo, v127, v255 -// W32: v_cmp_u_f16_e64 vcc_lo, v127, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction -v_cmp_u_f16 vcc_lo, vcc_hi, v255 -// W32: v_cmp_u_f16_e64 vcc_lo, vcc_hi, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction -v_cmp_u_f16 vcc_lo, vcc_lo, v255 -// W32: v_cmp_u_f16_e64 vcc_lo, vcc_lo, v255 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16 vcc_lo, v127, v255 +// W32: v_cmp_u_f16_e64 vcc_lo, v127, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cmp_class_f16 vcc, v128, v2 -// W64: v_cmp_class_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction -v_cmp_class_f16 vcc_lo, v128, v2 -// W32: v_cmp_class_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction -v_cmp_eq_f16 vcc, v128, v2 -// W64: v_cmp_eq_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16 vcc_lo, v128, v2 +// W32: v_cmp_u_f16_e64 vcc_lo, v128, v2 ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cmp_eq_f16 vcc_lo, v128, v2 -// W32: v_cmp_eq_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction -v_cmp_eq_i16 vcc, v128, v2 -// W64: v_cmp_eq_i16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] +// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction -v_cmp_eq_i16 vcc_lo, v128, v2 -// W32: v_cmp_eq_i16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmp_u_f16 vcc_lo, vcc_hi, v255 +// W32: v_cmp_u_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode -v_cmp_eq_u16 vcc, v128, v2 -// W64: v_cmp_eq_u16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_eq_u16 vcc_lo, v128, v2 -// W32: v_cmp_eq_u16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_f16 vcc, v128, v2 -// W64: v_cmp_ge_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_f16 vcc_lo, v128, v2 -// W32: v_cmp_ge_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_i16 vcc, v128, v2 -// W64: v_cmp_ge_i16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_i16 vcc_lo, v128, v2 -// W32: v_cmp_ge_i16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_u16 vcc, v128, v2 -// W64: v_cmp_ge_u16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ge_u16 vcc_lo, v128, v2 -// W32: v_cmp_ge_u16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_f16 vcc, v128, v2 -// W64: v_cmp_gt_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_f16 vcc_lo, v128, v2 -// W32: v_cmp_gt_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_i16 vcc, v128, v2 -// W64: v_cmp_gt_i16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_i16 vcc_lo, v128, v2 -// W32: v_cmp_gt_i16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_u16 vcc, v128, v2 -// W64: v_cmp_gt_u16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_gt_u16 vcc_lo, v128, v2 -// W32: v_cmp_gt_u16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_f16 vcc, v128, v2 -// W64: v_cmp_le_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_f16 vcc_lo, v128, v2 -// W32: v_cmp_le_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_i16 vcc, v128, v2 -// W64: v_cmp_le_i16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_i16 vcc_lo, v128, v2 -// W32: v_cmp_le_i16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_u16 vcc, v128, v2 -// W64: v_cmp_le_u16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_le_u16 vcc_lo, v128, v2 -// W32: v_cmp_le_u16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lg_f16 vcc, v128, v2 -// W64: v_cmp_lg_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lg_f16 vcc_lo, v128, v2 -// W32: v_cmp_lg_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_f16 vcc, v128, v2 -// W64: v_cmp_lt_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_f16 vcc_lo, v128, v2 -// W32: v_cmp_lt_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_i16 vcc, v128, v2 -// W64: v_cmp_lt_i16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_i16 vcc_lo, v128, v2 -// W32: v_cmp_lt_i16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_u16 vcc, v128, v2 -// W64: v_cmp_lt_u16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_lt_u16 vcc_lo, v128, v2 -// W32: v_cmp_lt_u16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_i16 vcc, v128, v2 -// W64: v_cmp_ne_i16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_i16 vcc_lo, v128, v2 -// W32: v_cmp_ne_i16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_u16 vcc, v128, v2 -// W64: v_cmp_ne_u16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ne_u16 vcc_lo, v128, v2 -// W32: v_cmp_ne_u16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_neq_f16 vcc, v128, v2 -// W64: v_cmp_neq_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_neq_f16 vcc_lo, v128, v2 -// W32: v_cmp_neq_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nge_f16 vcc, v128, v2 -// W64: v_cmp_nge_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nge_f16 vcc_lo, v128, v2 -// W32: v_cmp_nge_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ngt_f16 vcc, v128, v2 -// W64: v_cmp_ngt_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_ngt_f16 vcc_lo, v128, v2 -// W32: v_cmp_ngt_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nle_f16 vcc, v128, v2 -// W64: v_cmp_nle_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nle_f16 vcc_lo, v128, v2 -// W32: v_cmp_nle_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlg_f16 vcc, v128, v2 -// W64: v_cmp_nlg_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlg_f16 vcc_lo, v128, v2 -// W32: v_cmp_nlg_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlt_f16 vcc, v128, v2 -// W64: v_cmp_nlt_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_nlt_f16 vcc_lo, v128, v2 -// W32: v_cmp_nlt_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_o_f16 vcc, v128, v2 -// W64: v_cmp_o_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_o_f16 vcc_lo, v128, v2 -// W32: v_cmp_o_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16 vcc, v128, v2 -// W64: v_cmp_u_f16_e64 vcc, v128, v2 -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_u_f16 vcc_lo, v128, v2 -// W32: v_cmp_u_f16_e64 vcc_lo, v128, v2 -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_eq_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_eq_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ge_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ge_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_gt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_gt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_le_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_le_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_le_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_le_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_le_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_le_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_lt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_lt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_lt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_lt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ne_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ne_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0] -// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0] -// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_eq_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_eq_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_ge_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_ge_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_gt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_gt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_le_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_le_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_le_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_le_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_lt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_lt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_lt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_ne_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_ne_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0] -// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0] -// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_eq_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_eq_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ge_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ge_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_gt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_gt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_le_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_le_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_le_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_le_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_le_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_le_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ne_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ne_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_class_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_eq_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_eq_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_eq_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ge_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ge_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ge_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_gt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_gt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_gt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_le_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_le_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_le_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_le_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_le_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_lt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_lt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ne_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ne_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ne_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_neq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_ngt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nle_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_nlt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_o_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction - -v_cmp_u_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] -// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16 vcc_lo, vcc_lo, v255 +// W32: v_cmp_u_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00] +// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx-fake16.s new file mode 100644 index 000000000000000..a5b673494f134b9 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx-fake16.s @@ -0,0 +1,3404 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_cmpx_class_f16_e32 v1, v2 +// GFX12: encoding: [0x01,0x05,0xfa,0x7d] + +v_cmpx_class_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0xfa,0x7d] + +v_cmpx_class_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0xfa,0x7d] + +v_cmpx_class_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0xfa,0x7d] + +v_cmpx_class_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0xfa,0x7d] + +v_cmpx_class_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0xfa,0x7d] + +v_cmpx_class_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0xfa,0x7d] + +v_cmpx_class_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0xfa,0x7d] + +v_cmpx_class_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0xfa,0x7d] + +v_cmpx_class_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0xfa,0x7d] + +v_cmpx_class_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0xfa,0x7d] + +v_cmpx_class_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0xfa,0x7d] + +v_cmpx_class_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0xfa,0x7d] + +v_cmpx_class_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0xfa,0x7d] + +v_cmpx_class_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfa,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_class_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0xfc,0x7d] + +v_cmpx_class_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0xfc,0x7d] + +v_cmpx_class_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0xfc,0x7d] + +v_cmpx_class_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0xfc,0x7d] + +v_cmpx_class_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0xfc,0x7d] + +v_cmpx_class_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0xfc,0x7d] + +v_cmpx_class_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0xfc,0x7d] + +v_cmpx_class_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0xfc,0x7d] + +v_cmpx_class_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0xfc,0x7d] + +v_cmpx_class_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0xfc,0x7d] + +v_cmpx_class_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0xfc,0x7d] + +v_cmpx_class_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0xfc,0x7d] + +v_cmpx_class_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0xfc,0x7d] + +v_cmpx_class_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0xfc,0x7d] + +v_cmpx_class_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xfd,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_class_f64 v[1:2], v2 +// GFX12: encoding: [0x01,0x05,0xfe,0x7d] + +v_cmpx_class_f64 v[254:255], v2 +// GFX12: encoding: [0xfe,0x05,0xfe,0x7d] + +v_cmpx_class_f64 s[2:3], v2 +// GFX12: encoding: [0x02,0x04,0xfe,0x7d] + +v_cmpx_class_f64 s[104:105], v2 +// GFX12: encoding: [0x68,0x04,0xfe,0x7d] + +v_cmpx_class_f64 vcc, v2 +// GFX12: encoding: [0x6a,0x04,0xfe,0x7d] + +v_cmpx_class_f64 ttmp[14:15], v2 +// GFX12: encoding: [0x7a,0x04,0xfe,0x7d] + +v_cmpx_class_f64 exec, v2 +// GFX12: encoding: [0x7e,0x04,0xfe,0x7d] + +v_cmpx_class_f64 null, v2 +// GFX12: encoding: [0x7c,0x04,0xfe,0x7d] + +v_cmpx_class_f64 -1, v2 +// GFX12: encoding: [0xc1,0x04,0xfe,0x7d] + +v_cmpx_class_f64 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0xfe,0x7d] + +v_cmpx_class_f64 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0xfe,0x7d] + +v_cmpx_class_f64 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x04,0x7d] + +v_cmpx_eq_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x04,0x7d] + +v_cmpx_eq_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x04,0x7d] + +v_cmpx_eq_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x04,0x7d] + +v_cmpx_eq_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x04,0x7d] + +v_cmpx_eq_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x04,0x7d] + +v_cmpx_eq_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x04,0x7d] + +v_cmpx_eq_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x04,0x7d] + +v_cmpx_eq_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x04,0x7d] + +v_cmpx_eq_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x04,0x7d] + +v_cmpx_eq_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x04,0x7d] + +v_cmpx_eq_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x04,0x7d] + +v_cmpx_eq_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x04,0x7d] + +v_cmpx_eq_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x04,0x7d] + +v_cmpx_eq_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x04,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x24,0x7d] + +v_cmpx_eq_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x24,0x7d] + +v_cmpx_eq_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x24,0x7d] + +v_cmpx_eq_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x24,0x7d] + +v_cmpx_eq_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x24,0x7d] + +v_cmpx_eq_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x24,0x7d] + +v_cmpx_eq_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x24,0x7d] + +v_cmpx_eq_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x24,0x7d] + +v_cmpx_eq_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x24,0x7d] + +v_cmpx_eq_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x24,0x7d] + +v_cmpx_eq_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x24,0x7d] + +v_cmpx_eq_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x24,0x7d] + +v_cmpx_eq_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x24,0x7d] + +v_cmpx_eq_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x24,0x7d] + +v_cmpx_eq_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x25,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x44,0x7d] + +v_cmpx_eq_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x44,0x7d] + +v_cmpx_eq_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x44,0x7d] + +v_cmpx_eq_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x44,0x7d] + +v_cmpx_eq_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x44,0x7d] + +v_cmpx_eq_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x44,0x7d] + +v_cmpx_eq_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x44,0x7d] + +v_cmpx_eq_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x44,0x7d] + +v_cmpx_eq_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x44,0x7d] + +v_cmpx_eq_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x44,0x7d] + +v_cmpx_eq_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x44,0x7d] + +v_cmpx_eq_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x45,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_i16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x64,0x7d] + +v_cmpx_eq_i16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x64,0x7d] + +v_cmpx_eq_i16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x64,0x7d] + +v_cmpx_eq_i16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x64,0x7d] + +v_cmpx_eq_i16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x64,0x7d] + +v_cmpx_eq_i16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x64,0x7d] + +v_cmpx_eq_i16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x64,0x7d] + +v_cmpx_eq_i16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x64,0x7d] + +v_cmpx_eq_i16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x64,0x7d] + +v_cmpx_eq_i16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x64,0x7d] + +v_cmpx_eq_i16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x64,0x7d] + +v_cmpx_eq_i16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x64,0x7d] + +v_cmpx_eq_i16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x64,0x7d] + +v_cmpx_eq_i16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x64,0x7d] + +v_cmpx_eq_i16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x64,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_i32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x84,0x7d] + +v_cmpx_eq_i32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x84,0x7d] + +v_cmpx_eq_i32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x84,0x7d] + +v_cmpx_eq_i32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x84,0x7d] + +v_cmpx_eq_i32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x84,0x7d] + +v_cmpx_eq_i32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x84,0x7d] + +v_cmpx_eq_i32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x84,0x7d] + +v_cmpx_eq_i32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x84,0x7d] + +v_cmpx_eq_i32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x84,0x7d] + +v_cmpx_eq_i32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x84,0x7d] + +v_cmpx_eq_i32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x84,0x7d] + +v_cmpx_eq_i32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x84,0x7d] + +v_cmpx_eq_i32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x84,0x7d] + +v_cmpx_eq_i32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x84,0x7d] + +v_cmpx_eq_i32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x85,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_i64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xa4,0x7d] + +v_cmpx_eq_i64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xa4,0x7d] + +v_cmpx_eq_i64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xa4,0x7d] + +v_cmpx_eq_i64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xa5,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_u16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x74,0x7d] + +v_cmpx_eq_u16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x74,0x7d] + +v_cmpx_eq_u16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x74,0x7d] + +v_cmpx_eq_u16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x74,0x7d] + +v_cmpx_eq_u16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x74,0x7d] + +v_cmpx_eq_u16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x74,0x7d] + +v_cmpx_eq_u16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x74,0x7d] + +v_cmpx_eq_u16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x74,0x7d] + +v_cmpx_eq_u16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x74,0x7d] + +v_cmpx_eq_u16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x74,0x7d] + +v_cmpx_eq_u16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x74,0x7d] + +v_cmpx_eq_u16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x74,0x7d] + +v_cmpx_eq_u16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x74,0x7d] + +v_cmpx_eq_u16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x74,0x7d] + +v_cmpx_eq_u16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x74,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_eq_u32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x94,0x7d] + +v_cmpx_eq_u32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x94,0x7d] + +v_cmpx_eq_u32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x94,0x7d] + +v_cmpx_eq_u32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x94,0x7d] + +v_cmpx_eq_u32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x94,0x7d] + +v_cmpx_eq_u32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x94,0x7d] + +v_cmpx_eq_u32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x94,0x7d] + +v_cmpx_eq_u32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x94,0x7d] + +v_cmpx_eq_u32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x94,0x7d] + +v_cmpx_eq_u32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x94,0x7d] + +v_cmpx_eq_u32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x94,0x7d] + +v_cmpx_eq_u32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x94,0x7d] + +v_cmpx_eq_u32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x94,0x7d] + +v_cmpx_eq_u32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x94,0x7d] + +v_cmpx_eq_u32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x95,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_eq_u64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xb4,0x7d] + +v_cmpx_eq_u64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xb4,0x7d] + +v_cmpx_eq_u64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xb4,0x7d] + +v_cmpx_eq_u64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xb5,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x0c,0x7d] + +v_cmpx_ge_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0c,0x7d] + +v_cmpx_ge_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0c,0x7d] + +v_cmpx_ge_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x0c,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x2c,0x7d] + +v_cmpx_ge_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x2c,0x7d] + +v_cmpx_ge_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x2c,0x7d] + +v_cmpx_ge_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x2d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x4c,0x7d] + +v_cmpx_ge_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x4c,0x7d] + +v_cmpx_ge_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x4c,0x7d] + +v_cmpx_ge_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x4d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_i16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x6c,0x7d] + +v_cmpx_ge_i16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x6c,0x7d] + +v_cmpx_ge_i16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x6c,0x7d] + +v_cmpx_ge_i16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x6c,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_i32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x8c,0x7d] + +v_cmpx_ge_i32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x8c,0x7d] + +v_cmpx_ge_i32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x8c,0x7d] + +v_cmpx_ge_i32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x8d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_i64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xac,0x7d] + +v_cmpx_ge_i64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xac,0x7d] + +v_cmpx_ge_i64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xac,0x7d] + +v_cmpx_ge_i64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xac,0x7d] + +v_cmpx_ge_i64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xac,0x7d] + +v_cmpx_ge_i64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xac,0x7d] + +v_cmpx_ge_i64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xac,0x7d] + +v_cmpx_ge_i64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xac,0x7d] + +v_cmpx_ge_i64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xac,0x7d] + +v_cmpx_ge_i64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xac,0x7d] + +v_cmpx_ge_i64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xac,0x7d] + +v_cmpx_ge_i64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xad,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_u16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x7c,0x7d] + +v_cmpx_ge_u16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x7c,0x7d] + +v_cmpx_ge_u16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x7c,0x7d] + +v_cmpx_ge_u16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x7c,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ge_u32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x9c,0x7d] + +v_cmpx_ge_u32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x9c,0x7d] + +v_cmpx_ge_u32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x9c,0x7d] + +v_cmpx_ge_u32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x9d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ge_u64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xbc,0x7d] + +v_cmpx_ge_u64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xbc,0x7d] + +v_cmpx_ge_u64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xbc,0x7d] + +v_cmpx_ge_u64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xbd,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x08,0x7d] + +v_cmpx_gt_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x08,0x7d] + +v_cmpx_gt_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x08,0x7d] + +v_cmpx_gt_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x08,0x7d] + +v_cmpx_gt_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x08,0x7d] + +v_cmpx_gt_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x08,0x7d] + +v_cmpx_gt_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x08,0x7d] + +v_cmpx_gt_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x08,0x7d] + +v_cmpx_gt_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x08,0x7d] + +v_cmpx_gt_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x08,0x7d] + +v_cmpx_gt_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x08,0x7d] + +v_cmpx_gt_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x08,0x7d] + +v_cmpx_gt_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x08,0x7d] + +v_cmpx_gt_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x08,0x7d] + +v_cmpx_gt_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x08,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x28,0x7d] + +v_cmpx_gt_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x28,0x7d] + +v_cmpx_gt_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x28,0x7d] + +v_cmpx_gt_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x28,0x7d] + +v_cmpx_gt_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x28,0x7d] + +v_cmpx_gt_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x28,0x7d] + +v_cmpx_gt_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x28,0x7d] + +v_cmpx_gt_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x28,0x7d] + +v_cmpx_gt_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x28,0x7d] + +v_cmpx_gt_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x28,0x7d] + +v_cmpx_gt_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x28,0x7d] + +v_cmpx_gt_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x28,0x7d] + +v_cmpx_gt_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x28,0x7d] + +v_cmpx_gt_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x28,0x7d] + +v_cmpx_gt_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x29,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x48,0x7d] + +v_cmpx_gt_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x48,0x7d] + +v_cmpx_gt_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x48,0x7d] + +v_cmpx_gt_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x48,0x7d] + +v_cmpx_gt_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x48,0x7d] + +v_cmpx_gt_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x48,0x7d] + +v_cmpx_gt_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x48,0x7d] + +v_cmpx_gt_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x48,0x7d] + +v_cmpx_gt_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x48,0x7d] + +v_cmpx_gt_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x48,0x7d] + +v_cmpx_gt_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x48,0x7d] + +v_cmpx_gt_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x49,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_i16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x68,0x7d] + +v_cmpx_gt_i16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x68,0x7d] + +v_cmpx_gt_i16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x68,0x7d] + +v_cmpx_gt_i16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x68,0x7d] + +v_cmpx_gt_i16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x68,0x7d] + +v_cmpx_gt_i16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x68,0x7d] + +v_cmpx_gt_i16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x68,0x7d] + +v_cmpx_gt_i16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x68,0x7d] + +v_cmpx_gt_i16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x68,0x7d] + +v_cmpx_gt_i16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x68,0x7d] + +v_cmpx_gt_i16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x68,0x7d] + +v_cmpx_gt_i16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x68,0x7d] + +v_cmpx_gt_i16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x68,0x7d] + +v_cmpx_gt_i16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x68,0x7d] + +v_cmpx_gt_i16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x68,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_i32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x88,0x7d] + +v_cmpx_gt_i32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x88,0x7d] + +v_cmpx_gt_i32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x88,0x7d] + +v_cmpx_gt_i32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x88,0x7d] + +v_cmpx_gt_i32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x88,0x7d] + +v_cmpx_gt_i32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x88,0x7d] + +v_cmpx_gt_i32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x88,0x7d] + +v_cmpx_gt_i32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x88,0x7d] + +v_cmpx_gt_i32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x88,0x7d] + +v_cmpx_gt_i32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x88,0x7d] + +v_cmpx_gt_i32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x88,0x7d] + +v_cmpx_gt_i32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x88,0x7d] + +v_cmpx_gt_i32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x88,0x7d] + +v_cmpx_gt_i32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x88,0x7d] + +v_cmpx_gt_i32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x89,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_i64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xa8,0x7d] + +v_cmpx_gt_i64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xa8,0x7d] + +v_cmpx_gt_i64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xa8,0x7d] + +v_cmpx_gt_i64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xa9,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_u16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x78,0x7d] + +v_cmpx_gt_u16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x78,0x7d] + +v_cmpx_gt_u16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x78,0x7d] + +v_cmpx_gt_u16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x78,0x7d] + +v_cmpx_gt_u16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x78,0x7d] + +v_cmpx_gt_u16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x78,0x7d] + +v_cmpx_gt_u16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x78,0x7d] + +v_cmpx_gt_u16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x78,0x7d] + +v_cmpx_gt_u16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x78,0x7d] + +v_cmpx_gt_u16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x78,0x7d] + +v_cmpx_gt_u16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x78,0x7d] + +v_cmpx_gt_u16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x78,0x7d] + +v_cmpx_gt_u16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x78,0x7d] + +v_cmpx_gt_u16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x78,0x7d] + +v_cmpx_gt_u16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x78,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_gt_u32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x98,0x7d] + +v_cmpx_gt_u32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x98,0x7d] + +v_cmpx_gt_u32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x98,0x7d] + +v_cmpx_gt_u32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x98,0x7d] + +v_cmpx_gt_u32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x98,0x7d] + +v_cmpx_gt_u32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x98,0x7d] + +v_cmpx_gt_u32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x98,0x7d] + +v_cmpx_gt_u32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x98,0x7d] + +v_cmpx_gt_u32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x98,0x7d] + +v_cmpx_gt_u32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x98,0x7d] + +v_cmpx_gt_u32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x98,0x7d] + +v_cmpx_gt_u32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x98,0x7d] + +v_cmpx_gt_u32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x98,0x7d] + +v_cmpx_gt_u32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x98,0x7d] + +v_cmpx_gt_u32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x99,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_gt_u64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xb8,0x7d] + +v_cmpx_gt_u64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xb8,0x7d] + +v_cmpx_gt_u64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xb8,0x7d] + +v_cmpx_gt_u64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xb9,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x06,0x7d] + +v_cmpx_le_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x06,0x7d] + +v_cmpx_le_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x06,0x7d] + +v_cmpx_le_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x06,0x7d] + +v_cmpx_le_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x06,0x7d] + +v_cmpx_le_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x06,0x7d] + +v_cmpx_le_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x06,0x7d] + +v_cmpx_le_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x06,0x7d] + +v_cmpx_le_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x06,0x7d] + +v_cmpx_le_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x06,0x7d] + +v_cmpx_le_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x06,0x7d] + +v_cmpx_le_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x06,0x7d] + +v_cmpx_le_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x06,0x7d] + +v_cmpx_le_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x06,0x7d] + +v_cmpx_le_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x06,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x26,0x7d] + +v_cmpx_le_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x26,0x7d] + +v_cmpx_le_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x26,0x7d] + +v_cmpx_le_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x26,0x7d] + +v_cmpx_le_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x26,0x7d] + +v_cmpx_le_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x26,0x7d] + +v_cmpx_le_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x26,0x7d] + +v_cmpx_le_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x26,0x7d] + +v_cmpx_le_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x26,0x7d] + +v_cmpx_le_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x26,0x7d] + +v_cmpx_le_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x26,0x7d] + +v_cmpx_le_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x26,0x7d] + +v_cmpx_le_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x26,0x7d] + +v_cmpx_le_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x26,0x7d] + +v_cmpx_le_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x27,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x46,0x7d] + +v_cmpx_le_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x46,0x7d] + +v_cmpx_le_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x46,0x7d] + +v_cmpx_le_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x46,0x7d] + +v_cmpx_le_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x46,0x7d] + +v_cmpx_le_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x46,0x7d] + +v_cmpx_le_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x46,0x7d] + +v_cmpx_le_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x46,0x7d] + +v_cmpx_le_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x46,0x7d] + +v_cmpx_le_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x46,0x7d] + +v_cmpx_le_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x46,0x7d] + +v_cmpx_le_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x47,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_i16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x66,0x7d] + +v_cmpx_le_i16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x66,0x7d] + +v_cmpx_le_i16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x66,0x7d] + +v_cmpx_le_i16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x66,0x7d] + +v_cmpx_le_i16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x66,0x7d] + +v_cmpx_le_i16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x66,0x7d] + +v_cmpx_le_i16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x66,0x7d] + +v_cmpx_le_i16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x66,0x7d] + +v_cmpx_le_i16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x66,0x7d] + +v_cmpx_le_i16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x66,0x7d] + +v_cmpx_le_i16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x66,0x7d] + +v_cmpx_le_i16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x66,0x7d] + +v_cmpx_le_i16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x66,0x7d] + +v_cmpx_le_i16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x66,0x7d] + +v_cmpx_le_i16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x66,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_i32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x86,0x7d] + +v_cmpx_le_i32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x86,0x7d] + +v_cmpx_le_i32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x86,0x7d] + +v_cmpx_le_i32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x86,0x7d] + +v_cmpx_le_i32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x86,0x7d] + +v_cmpx_le_i32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x86,0x7d] + +v_cmpx_le_i32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x86,0x7d] + +v_cmpx_le_i32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x86,0x7d] + +v_cmpx_le_i32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x86,0x7d] + +v_cmpx_le_i32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x86,0x7d] + +v_cmpx_le_i32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x86,0x7d] + +v_cmpx_le_i32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x86,0x7d] + +v_cmpx_le_i32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x86,0x7d] + +v_cmpx_le_i32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x86,0x7d] + +v_cmpx_le_i32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x87,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_i64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xa6,0x7d] + +v_cmpx_le_i64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xa6,0x7d] + +v_cmpx_le_i64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xa6,0x7d] + +v_cmpx_le_i64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xa6,0x7d] + +v_cmpx_le_i64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xa6,0x7d] + +v_cmpx_le_i64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xa6,0x7d] + +v_cmpx_le_i64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xa6,0x7d] + +v_cmpx_le_i64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xa6,0x7d] + +v_cmpx_le_i64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xa6,0x7d] + +v_cmpx_le_i64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xa6,0x7d] + +v_cmpx_le_i64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xa6,0x7d] + +v_cmpx_le_i64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xa7,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_u16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x76,0x7d] + +v_cmpx_le_u16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x76,0x7d] + +v_cmpx_le_u16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x76,0x7d] + +v_cmpx_le_u16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x76,0x7d] + +v_cmpx_le_u16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x76,0x7d] + +v_cmpx_le_u16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x76,0x7d] + +v_cmpx_le_u16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x76,0x7d] + +v_cmpx_le_u16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x76,0x7d] + +v_cmpx_le_u16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x76,0x7d] + +v_cmpx_le_u16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x76,0x7d] + +v_cmpx_le_u16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x76,0x7d] + +v_cmpx_le_u16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x76,0x7d] + +v_cmpx_le_u16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x76,0x7d] + +v_cmpx_le_u16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x76,0x7d] + +v_cmpx_le_u16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x76,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_le_u32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x96,0x7d] + +v_cmpx_le_u32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x96,0x7d] + +v_cmpx_le_u32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x96,0x7d] + +v_cmpx_le_u32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x96,0x7d] + +v_cmpx_le_u32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x96,0x7d] + +v_cmpx_le_u32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x96,0x7d] + +v_cmpx_le_u32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x96,0x7d] + +v_cmpx_le_u32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x96,0x7d] + +v_cmpx_le_u32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x96,0x7d] + +v_cmpx_le_u32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x96,0x7d] + +v_cmpx_le_u32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x96,0x7d] + +v_cmpx_le_u32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x96,0x7d] + +v_cmpx_le_u32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x96,0x7d] + +v_cmpx_le_u32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x96,0x7d] + +v_cmpx_le_u32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x97,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_le_u64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xb6,0x7d] + +v_cmpx_le_u64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xb6,0x7d] + +v_cmpx_le_u64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xb6,0x7d] + +v_cmpx_le_u64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xb6,0x7d] + +v_cmpx_le_u64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xb6,0x7d] + +v_cmpx_le_u64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xb6,0x7d] + +v_cmpx_le_u64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xb6,0x7d] + +v_cmpx_le_u64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xb6,0x7d] + +v_cmpx_le_u64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xb6,0x7d] + +v_cmpx_le_u64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xb6,0x7d] + +v_cmpx_le_u64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xb6,0x7d] + +v_cmpx_le_u64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xb7,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lg_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x7d] + +v_cmpx_lg_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x7d] + +v_cmpx_lg_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x7d] + +v_cmpx_lg_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x0a,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lg_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x2a,0x7d] + +v_cmpx_lg_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x2a,0x7d] + +v_cmpx_lg_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x2a,0x7d] + +v_cmpx_lg_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x2b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lg_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x4a,0x7d] + +v_cmpx_lg_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x4a,0x7d] + +v_cmpx_lg_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x4a,0x7d] + +v_cmpx_lg_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x4b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x02,0x7d] + +v_cmpx_lt_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x02,0x7d] + +v_cmpx_lt_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x02,0x7d] + +v_cmpx_lt_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x02,0x7d] + +v_cmpx_lt_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x02,0x7d] + +v_cmpx_lt_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x02,0x7d] + +v_cmpx_lt_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x02,0x7d] + +v_cmpx_lt_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x02,0x7d] + +v_cmpx_lt_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x02,0x7d] + +v_cmpx_lt_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x02,0x7d] + +v_cmpx_lt_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x02,0x7d] + +v_cmpx_lt_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x02,0x7d] + +v_cmpx_lt_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x02,0x7d] + +v_cmpx_lt_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x02,0x7d] + +v_cmpx_lt_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x22,0x7d] + +v_cmpx_lt_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x22,0x7d] + +v_cmpx_lt_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x22,0x7d] + +v_cmpx_lt_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x22,0x7d] + +v_cmpx_lt_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x22,0x7d] + +v_cmpx_lt_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x22,0x7d] + +v_cmpx_lt_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x22,0x7d] + +v_cmpx_lt_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x22,0x7d] + +v_cmpx_lt_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x22,0x7d] + +v_cmpx_lt_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x22,0x7d] + +v_cmpx_lt_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x22,0x7d] + +v_cmpx_lt_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x22,0x7d] + +v_cmpx_lt_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x22,0x7d] + +v_cmpx_lt_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x22,0x7d] + +v_cmpx_lt_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x23,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x42,0x7d] + +v_cmpx_lt_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x42,0x7d] + +v_cmpx_lt_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x42,0x7d] + +v_cmpx_lt_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x42,0x7d] + +v_cmpx_lt_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x42,0x7d] + +v_cmpx_lt_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x42,0x7d] + +v_cmpx_lt_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x42,0x7d] + +v_cmpx_lt_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x42,0x7d] + +v_cmpx_lt_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x42,0x7d] + +v_cmpx_lt_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x42,0x7d] + +v_cmpx_lt_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x42,0x7d] + +v_cmpx_lt_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x43,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_i16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x62,0x7d] + +v_cmpx_lt_i16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x62,0x7d] + +v_cmpx_lt_i16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x62,0x7d] + +v_cmpx_lt_i16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x62,0x7d] + +v_cmpx_lt_i16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x62,0x7d] + +v_cmpx_lt_i16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x62,0x7d] + +v_cmpx_lt_i16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x62,0x7d] + +v_cmpx_lt_i16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x62,0x7d] + +v_cmpx_lt_i16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x62,0x7d] + +v_cmpx_lt_i16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x62,0x7d] + +v_cmpx_lt_i16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x62,0x7d] + +v_cmpx_lt_i16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x62,0x7d] + +v_cmpx_lt_i16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x62,0x7d] + +v_cmpx_lt_i16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x62,0x7d] + +v_cmpx_lt_i16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x62,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_i32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x82,0x7d] + +v_cmpx_lt_i32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x82,0x7d] + +v_cmpx_lt_i32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x82,0x7d] + +v_cmpx_lt_i32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x82,0x7d] + +v_cmpx_lt_i32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x82,0x7d] + +v_cmpx_lt_i32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x82,0x7d] + +v_cmpx_lt_i32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x82,0x7d] + +v_cmpx_lt_i32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x82,0x7d] + +v_cmpx_lt_i32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x82,0x7d] + +v_cmpx_lt_i32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x82,0x7d] + +v_cmpx_lt_i32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x82,0x7d] + +v_cmpx_lt_i32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x82,0x7d] + +v_cmpx_lt_i32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x82,0x7d] + +v_cmpx_lt_i32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x82,0x7d] + +v_cmpx_lt_i32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x83,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_i64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xa2,0x7d] + +v_cmpx_lt_i64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xa2,0x7d] + +v_cmpx_lt_i64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xa2,0x7d] + +v_cmpx_lt_i64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xa3,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_u16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x72,0x7d] + +v_cmpx_lt_u16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x72,0x7d] + +v_cmpx_lt_u16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x72,0x7d] + +v_cmpx_lt_u16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x72,0x7d] + +v_cmpx_lt_u16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x72,0x7d] + +v_cmpx_lt_u16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x72,0x7d] + +v_cmpx_lt_u16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x72,0x7d] + +v_cmpx_lt_u16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x72,0x7d] + +v_cmpx_lt_u16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x72,0x7d] + +v_cmpx_lt_u16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x72,0x7d] + +v_cmpx_lt_u16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x72,0x7d] + +v_cmpx_lt_u16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x72,0x7d] + +v_cmpx_lt_u16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x72,0x7d] + +v_cmpx_lt_u16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x72,0x7d] + +v_cmpx_lt_u16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x72,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_lt_u32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x92,0x7d] + +v_cmpx_lt_u32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x92,0x7d] + +v_cmpx_lt_u32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x92,0x7d] + +v_cmpx_lt_u32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x92,0x7d] + +v_cmpx_lt_u32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x92,0x7d] + +v_cmpx_lt_u32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x92,0x7d] + +v_cmpx_lt_u32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x92,0x7d] + +v_cmpx_lt_u32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x92,0x7d] + +v_cmpx_lt_u32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x92,0x7d] + +v_cmpx_lt_u32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x92,0x7d] + +v_cmpx_lt_u32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x92,0x7d] + +v_cmpx_lt_u32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x92,0x7d] + +v_cmpx_lt_u32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x92,0x7d] + +v_cmpx_lt_u32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x92,0x7d] + +v_cmpx_lt_u32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x93,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_lt_u64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xb2,0x7d] + +v_cmpx_lt_u64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xb2,0x7d] + +v_cmpx_lt_u64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xb2,0x7d] + +v_cmpx_lt_u64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xb3,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_i16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x6a,0x7d] + +v_cmpx_ne_i16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x6a,0x7d] + +v_cmpx_ne_i16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x6a,0x7d] + +v_cmpx_ne_i16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x6a,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ne_i32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x8a,0x7d] + +v_cmpx_ne_i32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x8a,0x7d] + +v_cmpx_ne_i32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x8a,0x7d] + +v_cmpx_ne_i32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x8b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_i64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xaa,0x7d] + +v_cmpx_ne_i64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xaa,0x7d] + +v_cmpx_ne_i64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xaa,0x7d] + +v_cmpx_ne_i64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xab,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_u16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x7a,0x7d] + +v_cmpx_ne_u16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x7a,0x7d] + +v_cmpx_ne_u16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x7a,0x7d] + +v_cmpx_ne_u16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x7a,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ne_u32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x9a,0x7d] + +v_cmpx_ne_u32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x9a,0x7d] + +v_cmpx_ne_u32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x9a,0x7d] + +v_cmpx_ne_u32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x9b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ne_u64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0xba,0x7d] + +v_cmpx_ne_u64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0xba,0x7d] + +v_cmpx_ne_u64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0xba,0x7d] + +v_cmpx_ne_u64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0xba,0x7d] + +v_cmpx_ne_u64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0xba,0x7d] + +v_cmpx_ne_u64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0xba,0x7d] + +v_cmpx_ne_u64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0xba,0x7d] + +v_cmpx_ne_u64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0xba,0x7d] + +v_cmpx_ne_u64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0xba,0x7d] + +v_cmpx_ne_u64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0xba,0x7d] + +v_cmpx_ne_u64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0xba,0x7d] + +v_cmpx_ne_u64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xbb,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_neq_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x1a,0x7d] + +v_cmpx_neq_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x1a,0x7d] + +v_cmpx_neq_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x1a,0x7d] + +v_cmpx_neq_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x1a,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_neq_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x3a,0x7d] + +v_cmpx_neq_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x3a,0x7d] + +v_cmpx_neq_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x3a,0x7d] + +v_cmpx_neq_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x3b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_neq_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x5a,0x7d] + +v_cmpx_neq_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x5a,0x7d] + +v_cmpx_neq_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x5a,0x7d] + +v_cmpx_neq_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x5b,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nge_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x12,0x7d] + +v_cmpx_nge_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x12,0x7d] + +v_cmpx_nge_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x12,0x7d] + +v_cmpx_nge_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x12,0x7d] + +v_cmpx_nge_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x12,0x7d] + +v_cmpx_nge_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x12,0x7d] + +v_cmpx_nge_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x12,0x7d] + +v_cmpx_nge_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x12,0x7d] + +v_cmpx_nge_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x12,0x7d] + +v_cmpx_nge_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x12,0x7d] + +v_cmpx_nge_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x12,0x7d] + +v_cmpx_nge_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x12,0x7d] + +v_cmpx_nge_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x12,0x7d] + +v_cmpx_nge_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x12,0x7d] + +v_cmpx_nge_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x12,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_nge_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x32,0x7d] + +v_cmpx_nge_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x32,0x7d] + +v_cmpx_nge_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x32,0x7d] + +v_cmpx_nge_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x32,0x7d] + +v_cmpx_nge_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x32,0x7d] + +v_cmpx_nge_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x32,0x7d] + +v_cmpx_nge_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x32,0x7d] + +v_cmpx_nge_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x32,0x7d] + +v_cmpx_nge_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x32,0x7d] + +v_cmpx_nge_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x32,0x7d] + +v_cmpx_nge_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x32,0x7d] + +v_cmpx_nge_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x32,0x7d] + +v_cmpx_nge_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x32,0x7d] + +v_cmpx_nge_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x32,0x7d] + +v_cmpx_nge_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x33,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nge_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x52,0x7d] + +v_cmpx_nge_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x52,0x7d] + +v_cmpx_nge_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x52,0x7d] + +v_cmpx_nge_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x52,0x7d] + +v_cmpx_nge_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x52,0x7d] + +v_cmpx_nge_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x52,0x7d] + +v_cmpx_nge_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x52,0x7d] + +v_cmpx_nge_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x52,0x7d] + +v_cmpx_nge_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x52,0x7d] + +v_cmpx_nge_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x52,0x7d] + +v_cmpx_nge_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x52,0x7d] + +v_cmpx_nge_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x53,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ngt_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x16,0x7d] + +v_cmpx_ngt_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x16,0x7d] + +v_cmpx_ngt_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x16,0x7d] + +v_cmpx_ngt_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x16,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_ngt_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x36,0x7d] + +v_cmpx_ngt_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x36,0x7d] + +v_cmpx_ngt_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x36,0x7d] + +v_cmpx_ngt_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x37,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_ngt_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x56,0x7d] + +v_cmpx_ngt_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x56,0x7d] + +v_cmpx_ngt_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x56,0x7d] + +v_cmpx_ngt_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x57,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nle_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x18,0x7d] + +v_cmpx_nle_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x18,0x7d] + +v_cmpx_nle_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x18,0x7d] + +v_cmpx_nle_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x18,0x7d] + +v_cmpx_nle_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x18,0x7d] + +v_cmpx_nle_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x18,0x7d] + +v_cmpx_nle_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x18,0x7d] + +v_cmpx_nle_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x18,0x7d] + +v_cmpx_nle_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x18,0x7d] + +v_cmpx_nle_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x18,0x7d] + +v_cmpx_nle_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x18,0x7d] + +v_cmpx_nle_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x18,0x7d] + +v_cmpx_nle_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x18,0x7d] + +v_cmpx_nle_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x18,0x7d] + +v_cmpx_nle_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x18,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_nle_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x38,0x7d] + +v_cmpx_nle_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x38,0x7d] + +v_cmpx_nle_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x38,0x7d] + +v_cmpx_nle_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x38,0x7d] + +v_cmpx_nle_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x38,0x7d] + +v_cmpx_nle_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x38,0x7d] + +v_cmpx_nle_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x38,0x7d] + +v_cmpx_nle_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x38,0x7d] + +v_cmpx_nle_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x38,0x7d] + +v_cmpx_nle_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x38,0x7d] + +v_cmpx_nle_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x38,0x7d] + +v_cmpx_nle_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x38,0x7d] + +v_cmpx_nle_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x38,0x7d] + +v_cmpx_nle_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x38,0x7d] + +v_cmpx_nle_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x39,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nle_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x58,0x7d] + +v_cmpx_nle_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x58,0x7d] + +v_cmpx_nle_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x58,0x7d] + +v_cmpx_nle_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x58,0x7d] + +v_cmpx_nle_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x58,0x7d] + +v_cmpx_nle_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x58,0x7d] + +v_cmpx_nle_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x58,0x7d] + +v_cmpx_nle_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x58,0x7d] + +v_cmpx_nle_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x58,0x7d] + +v_cmpx_nle_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x58,0x7d] + +v_cmpx_nle_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x58,0x7d] + +v_cmpx_nle_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x59,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nlg_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x14,0x7d] + +v_cmpx_nlg_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x14,0x7d] + +v_cmpx_nlg_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x14,0x7d] + +v_cmpx_nlg_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x14,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_nlg_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x34,0x7d] + +v_cmpx_nlg_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x34,0x7d] + +v_cmpx_nlg_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x34,0x7d] + +v_cmpx_nlg_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x35,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nlg_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x54,0x7d] + +v_cmpx_nlg_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x54,0x7d] + +v_cmpx_nlg_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x54,0x7d] + +v_cmpx_nlg_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x55,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nlt_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x1c,0x7d] + +v_cmpx_nlt_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x1c,0x7d] + +v_cmpx_nlt_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x1c,0x7d] + +v_cmpx_nlt_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x1c,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_nlt_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x3c,0x7d] + +v_cmpx_nlt_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x3c,0x7d] + +v_cmpx_nlt_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x3c,0x7d] + +v_cmpx_nlt_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x3d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_nlt_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x5c,0x7d] + +v_cmpx_nlt_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x5c,0x7d] + +v_cmpx_nlt_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x5c,0x7d] + +v_cmpx_nlt_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x5d,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_o_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x0e,0x7d] + +v_cmpx_o_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0e,0x7d] + +v_cmpx_o_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x0e,0x7d] + +v_cmpx_o_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x0e,0x7d] + +v_cmpx_o_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0e,0x7d] + +v_cmpx_o_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0e,0x7d] + +v_cmpx_o_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0e,0x7d] + +v_cmpx_o_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0e,0x7d] + +v_cmpx_o_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0e,0x7d] + +v_cmpx_o_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0e,0x7d] + +v_cmpx_o_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x0e,0x7d] + +v_cmpx_o_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0e,0x7d] + +v_cmpx_o_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0e,0x7d] + +v_cmpx_o_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0e,0x7d] + +v_cmpx_o_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x0e,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_o_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x2e,0x7d] + +v_cmpx_o_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x2e,0x7d] + +v_cmpx_o_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x2e,0x7d] + +v_cmpx_o_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x2e,0x7d] + +v_cmpx_o_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x2e,0x7d] + +v_cmpx_o_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x2e,0x7d] + +v_cmpx_o_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x2e,0x7d] + +v_cmpx_o_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x2e,0x7d] + +v_cmpx_o_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x2e,0x7d] + +v_cmpx_o_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x2e,0x7d] + +v_cmpx_o_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x2e,0x7d] + +v_cmpx_o_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x2e,0x7d] + +v_cmpx_o_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x2e,0x7d] + +v_cmpx_o_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x2e,0x7d] + +v_cmpx_o_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x2f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_o_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x4e,0x7d] + +v_cmpx_o_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x4e,0x7d] + +v_cmpx_o_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x4e,0x7d] + +v_cmpx_o_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x4e,0x7d] + +v_cmpx_o_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x4e,0x7d] + +v_cmpx_o_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x4e,0x7d] + +v_cmpx_o_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x4e,0x7d] + +v_cmpx_o_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x4e,0x7d] + +v_cmpx_o_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x4e,0x7d] + +v_cmpx_o_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x4e,0x7d] + +v_cmpx_o_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x4e,0x7d] + +v_cmpx_o_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x4f,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_u_f16 v1, v2 +// GFX12: encoding: [0x01,0x05,0x10,0x7d] + +v_cmpx_u_f16 v127, v2 +// GFX12: encoding: [0x7f,0x05,0x10,0x7d] + +v_cmpx_u_f16 s1, v2 +// GFX12: encoding: [0x01,0x04,0x10,0x7d] + +v_cmpx_u_f16 s105, v2 +// GFX12: encoding: [0x69,0x04,0x10,0x7d] + +v_cmpx_u_f16 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x10,0x7d] + +v_cmpx_u_f16 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x10,0x7d] + +v_cmpx_u_f16 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x10,0x7d] + +v_cmpx_u_f16 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x10,0x7d] + +v_cmpx_u_f16 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x10,0x7d] + +v_cmpx_u_f16 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x10,0x7d] + +v_cmpx_u_f16 null, v2 +// GFX12: encoding: [0x7c,0x04,0x10,0x7d] + +v_cmpx_u_f16 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x10,0x7d] + +v_cmpx_u_f16 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x10,0x7d] + +v_cmpx_u_f16 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x10,0x7d] + +v_cmpx_u_f16 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0x10,0x7d,0x0b,0xfe,0x00,0x00] + +v_cmpx_u_f32 v1, v2 +// GFX12: encoding: [0x01,0x05,0x30,0x7d] + +v_cmpx_u_f32 v255, v2 +// GFX12: encoding: [0xff,0x05,0x30,0x7d] + +v_cmpx_u_f32 s1, v2 +// GFX12: encoding: [0x01,0x04,0x30,0x7d] + +v_cmpx_u_f32 s105, v2 +// GFX12: encoding: [0x69,0x04,0x30,0x7d] + +v_cmpx_u_f32 vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x30,0x7d] + +v_cmpx_u_f32 vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x30,0x7d] + +v_cmpx_u_f32 ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x30,0x7d] + +v_cmpx_u_f32 m0, v2 +// GFX12: encoding: [0x7d,0x04,0x30,0x7d] + +v_cmpx_u_f32 exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x30,0x7d] + +v_cmpx_u_f32 exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x30,0x7d] + +v_cmpx_u_f32 null, v2 +// GFX12: encoding: [0x7c,0x04,0x30,0x7d] + +v_cmpx_u_f32 -1, v2 +// GFX12: encoding: [0xc1,0x04,0x30,0x7d] + +v_cmpx_u_f32 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x30,0x7d] + +v_cmpx_u_f32 src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x30,0x7d] + +v_cmpx_u_f32 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0x31,0x7d,0x56,0x34,0x12,0xaf] + +v_cmpx_u_f64 v[1:2], v[2:3] +// GFX12: encoding: [0x01,0x05,0x50,0x7d] + +v_cmpx_u_f64 v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x50,0x7d] + +v_cmpx_u_f64 s[2:3], v[2:3] +// GFX12: encoding: [0x02,0x04,0x50,0x7d] + +v_cmpx_u_f64 s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x50,0x7d] + +v_cmpx_u_f64 vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x50,0x7d] + +v_cmpx_u_f64 ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x50,0x7d] + +v_cmpx_u_f64 exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x50,0x7d] + +v_cmpx_u_f64 null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x50,0x7d] + +v_cmpx_u_f64 -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x50,0x7d] + +v_cmpx_u_f64 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x50,0x7d] + +v_cmpx_u_f64 src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x50,0x7d] + +v_cmpx_u_f64 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0x51,0x7d,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s index 4c5a8e638e3fbe5..8c01cf4fbce20e2 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s v_cmpx_class_f16_e32 v1, v2 // GFX12: encoding: [0x01,0x05,0xfa,0x7d] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16-fake16.s new file mode 100644 index 000000000000000..d4e8069f87984ca --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16-fake16.s @@ -0,0 +1,2270 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_class_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_class_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_class_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_class_f16 -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfa,0x7d,0x7f,0x6f,0x35,0x30] + +v_cmpx_class_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_class_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_class_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_class_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_class_f32 -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfd,0x7d,0xff,0x6f,0x35,0x30] + +v_cmpx_eq_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x04,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_eq_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x25,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_eq_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_i16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x64,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_eq_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_i32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x85,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_eq_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_u16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x74,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_eq_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_eq_u32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_eq_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_eq_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_eq_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x95,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x0c,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_ge_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x2d,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_ge_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_i16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x6c,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_ge_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_i32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x8d,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_ge_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_u16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x7c,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_ge_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ge_u32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ge_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ge_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ge_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x9d,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x08,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_gt_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x29,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_gt_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_i16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x68,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_gt_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_i32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x89,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_gt_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_u16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x78,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_gt_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_gt_u32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_gt_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_gt_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_gt_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x99,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_le_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x06,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_le_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x27,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_le_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_i16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x66,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_le_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_i32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x87,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_le_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_u16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x76,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_le_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_le_u32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_le_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_le_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_le_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x97,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_lg_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lg_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lg_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lg_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lg_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x0a,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_lg_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lg_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lg_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x2b,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x23,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_lt_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_i16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x62,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_lt_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_i32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x83,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_lt_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_u16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x72,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_lt_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_lt_u32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_lt_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_lt_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_lt_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x93,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_ne_i16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_i16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ne_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ne_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x6a,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_ne_i32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_i32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ne_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ne_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x8b,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_ne_u16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_u16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ne_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ne_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x7a,0x7d,0x7f,0x6f,0x05,0x30] + +v_cmpx_ne_u32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ne_u32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ne_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ne_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ne_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x9b,0x7d,0xff,0x6f,0x05,0x30] + +v_cmpx_neq_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_neq_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_neq_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_neq_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_neq_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x1a,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_neq_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_neq_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_neq_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_neq_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_neq_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x3b,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_nge_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nge_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nge_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nge_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nge_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x12,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_nge_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nge_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nge_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nge_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nge_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x33,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_ngt_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ngt_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ngt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ngt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ngt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x16,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_ngt_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_ngt_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_ngt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_ngt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_ngt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x37,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_nle_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nle_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nle_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nle_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nle_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x18,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_nle_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nle_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nle_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nle_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nle_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x39,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_nlg_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nlg_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nlg_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nlg_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nlg_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x14,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_nlg_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nlg_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nlg_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nlg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nlg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x35,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_nlt_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nlt_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nlt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nlt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nlt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x1c,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_nlt_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_nlt_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_nlt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_nlt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_nlt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x3d,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_o_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_o_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_o_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_o_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_o_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x0e,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_o_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_o_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_o_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_o_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_o_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x2f,0x7d,0xff,0x6f,0xf5,0x30] + +v_cmpx_u_f16 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f16 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_u_f16 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_u_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_u_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_u_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x10,0x7d,0x7f,0x6f,0xf5,0x30] + +v_cmpx_u_f32 v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f32 v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0xe4,0x00,0xff] + +v_cmpx_u_f32 v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x40,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x41,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x01,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x0f,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x11,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x1f,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x21,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x2f,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x50,0x01,0xff] + +v_cmpx_u_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x5f,0x01,0x01] + +v_cmpx_u_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x60,0x09,0x13] + +v_cmpx_u_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0x31,0x7d,0xff,0x6f,0xf5,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s index 9c29f5bcd714b1f..2dc2ecfbe9ba73f 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8-fake16.s new file mode 100644 index 000000000000000..067a1e2a65d20d8 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8-fake16.s @@ -0,0 +1,488 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfa,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0xfc,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0xfc,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfd,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_eq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x04,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x04,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x04,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_eq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x24,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x24,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x25,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_eq_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x64,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x64,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x64,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_eq_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x84,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x84,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x85,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_eq_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x74,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x74,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x74,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_eq_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x94,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x94,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x95,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x0c,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x2c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x2c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x2d,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ge_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x6c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x6c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x6c,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ge_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x8c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x8c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x8d,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ge_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x7c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x7c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x7c,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ge_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x9c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x9c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x9d,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_gt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x08,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x08,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x08,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_gt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x28,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x28,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x29,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_gt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x68,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x68,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x68,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_gt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x88,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x88,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x89,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_gt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x78,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x78,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x78,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_gt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x98,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x98,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x99,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_le_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x06,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x06,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x06,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_le_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x26,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x26,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x27,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_le_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x66,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x66,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x66,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_le_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x86,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x86,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x87,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_le_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x76,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x76,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x76,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_le_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x96,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x96,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_le_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x97,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_lg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x0a,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x2a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x2a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x2b,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x02,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x22,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x23,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_lt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x62,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x62,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x62,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x82,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x82,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x83,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_lt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x72,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x72,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x72,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_lt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x92,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x92,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x93,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ne_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x6a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x6a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x6a,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ne_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x8a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x8a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x8b,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ne_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x7a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x7a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x7a,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ne_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x9a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x9a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x9b,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_neq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x1a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x1a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x1a,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_neq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x3a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x3a,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x3b,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_nge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x12,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x12,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x12,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_nge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x32,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x32,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x33,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_ngt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x16,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x16,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x16,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_ngt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x36,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x36,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x37,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_nle_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x18,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x18,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x18,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_nle_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x38,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x38,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x39,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_nlg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x14,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x14,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x14,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_nlg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x34,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x34,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x35,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_nlt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x1c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x1c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x1c,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_nlt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x3c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x3c,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x3d,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_o_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x0e,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_o_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x2e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x2e,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x2f,0x7d,0xff,0x00,0x00,0x00] + +v_cmpx_u_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x10,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x10,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x10,0x7d,0x7f,0x00,0x00,0x00] + +v_cmpx_u_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x30,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x30,0x7d,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0x31,0x7d,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s index 0f82932a9e34bb0..a679d693a595f25 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s @@ -1,5 +1,5 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s index 8d6bff1521010f9..5019324d174b873 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s @@ -1,487 +1,488 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s v_cmpx_class_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmpx_eq_i16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_cmpx_eq_u16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_i16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ne_u16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ngt_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nlt_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_o_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_u_f16_e32 v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_class_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_i16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_u16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_i16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ne_u16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ngt_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_i16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nlt_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_o_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_u_f16_e32 v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_cmpx_gt_i16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction v_cmpx_le_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_lt_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmpx_ngt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction -v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_u_f16_e32 v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_cmpx_u_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction + +v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s index 9d7b60b917306df..4f462861e3a0b05 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s @@ -1,487 +1,488 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 %s v_cmpx_class_f16 v1, v255 -// GFX12: v_cmpx_class_f16_e64 +// GFX12: v_cmpx_class_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_eq_f16 v1, v255 -// GFX12: v_cmpx_eq_f16_e64 - -v_cmpx_eq_i16 v1, v255 -// GFX12: v_cmpx_eq_i16_e64 - -v_cmpx_eq_u16 v1, v255 -// GFX12: v_cmpx_eq_u16_e64 +v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_class_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_ge_f16 v1, v255 -// GFX12: v_cmpx_ge_f16_e64 +v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_class_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_ge_i16 v1, v255 -// GFX12: v_cmpx_ge_i16_e64 +v_cmpx_class_f16 v255, v2 +// GFX12: v_cmpx_class_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ge_u16 v1, v255 -// GFX12: v_cmpx_ge_u16_e64 +v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_class_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_gt_f16 v1, v255 -// GFX12: v_cmpx_gt_f16_e64 +v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_class_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_gt_i16 v1, v255 -// GFX12: v_cmpx_gt_i16_e64 +v_cmpx_eq_f16 v1, v255 +// GFX12: v_cmpx_eq_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_gt_u16 v1, v255 -// GFX12: v_cmpx_gt_u16_e64 +v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_le_f16 v1, v255 -// GFX12: v_cmpx_le_f16_e64 +v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_le_i16 v1, v255 -// GFX12: v_cmpx_le_i16_e64 +v_cmpx_eq_f16 v255, v2 +// GFX12: v_cmpx_eq_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x82,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_le_u16 v1, v255 -// GFX12: v_cmpx_le_u16_e64 +v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_lg_f16 v1, v255 -// GFX12: v_cmpx_lg_f16_e64 +v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_lt_f16 v1, v255 -// GFX12: v_cmpx_lt_f16_e64 +v_cmpx_eq_i16 v1, v255 +// GFX12: v_cmpx_eq_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_lt_i16 v1, v255 -// GFX12: v_cmpx_lt_i16_e64 +v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_u16 v1, v255 -// GFX12: v_cmpx_lt_u16_e64 +v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_ne_i16 v1, v255 -// GFX12: v_cmpx_ne_i16_e64 +v_cmpx_eq_i16 v255, v2 +// GFX12: v_cmpx_eq_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ne_u16 v1, v255 -// GFX12: v_cmpx_ne_u16_e64 +v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_neq_f16 v1, v255 -// GFX12: v_cmpx_neq_f16_e64 +v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nge_f16 v1, v255 -// GFX12: v_cmpx_nge_f16_e64 +v_cmpx_eq_u16 v1, v255 +// GFX12: v_cmpx_eq_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ngt_f16 v1, v255 -// GFX12: v_cmpx_ngt_f16_e64 +v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_nle_f16 v1, v255 -// GFX12: v_cmpx_nle_f16_e64 +v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_nlg_f16 v1, v255 -// GFX12: v_cmpx_nlg_f16_e64 +v_cmpx_eq_u16 v255, v2 +// GFX12: v_cmpx_eq_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_nlt_f16 v1, v255 -// GFX12: v_cmpx_nlt_f16_e64 +v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_eq_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_o_f16 v1, v255 -// GFX12: v_cmpx_o_f16_e64 +v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_eq_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_u_f16 v1, v255 -// GFX12: v_cmpx_u_f16_e64 +v_cmpx_ge_f16 v1, v255 +// GFX12: v_cmpx_ge_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_class_f16 v255, v2 -// GFX12: v_cmpx_class_f16_e64 +v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_eq_f16 v255, v2 -// GFX12: v_cmpx_eq_f16_e64 +v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_eq_i16 v255, v2 -// GFX12: v_cmpx_eq_i16_e64 +v_cmpx_ge_f16 v255, v2 +// GFX12: v_cmpx_ge_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x86,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_eq_u16 v255, v2 -// GFX12: v_cmpx_eq_u16_e64 +v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ge_f16 v255, v2 -// GFX12: v_cmpx_ge_f16_e64 +v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ge_i16 v255, v2 -// GFX12: v_cmpx_ge_i16_e64 +v_cmpx_ge_i16 v1, v255 +// GFX12: v_cmpx_ge_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ge_u16 v255, v2 -// GFX12: v_cmpx_ge_u16_e64 +v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_gt_f16 v255, v2 -// GFX12: v_cmpx_gt_f16_e64 +v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_gt_i16 v255, v2 -// GFX12: v_cmpx_gt_i16_e64 +v_cmpx_ge_i16 v255, v2 +// GFX12: v_cmpx_ge_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_gt_u16 v255, v2 -// GFX12: v_cmpx_gt_u16_e64 +v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_le_f16 v255, v2 -// GFX12: v_cmpx_le_f16_e64 +v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_le_i16 v255, v2 -// GFX12: v_cmpx_le_i16_e64 +v_cmpx_ge_u16 v1, v255 +// GFX12: v_cmpx_ge_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_le_u16 v255, v2 -// GFX12: v_cmpx_le_u16_e64 +v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lg_f16 v255, v2 -// GFX12: v_cmpx_lg_f16_e64 +v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v255, v2 -// GFX12: v_cmpx_lt_f16_e64 +v_cmpx_ge_u16 v255, v2 +// GFX12: v_cmpx_ge_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_lt_i16 v255, v2 -// GFX12: v_cmpx_lt_i16_e64 +v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ge_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_lt_u16 v255, v2 -// GFX12: v_cmpx_lt_u16_e64 +v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ge_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ne_i16 v255, v2 -// GFX12: v_cmpx_ne_i16_e64 +v_cmpx_gt_f16 v1, v255 +// GFX12: v_cmpx_gt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ne_u16 v255, v2 -// GFX12: v_cmpx_ne_u16_e64 +v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_neq_f16 v255, v2 -// GFX12: v_cmpx_neq_f16_e64 +v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_nge_f16 v255, v2 -// GFX12: v_cmpx_nge_f16_e64 +v_cmpx_gt_f16 v255, v2 +// GFX12: v_cmpx_gt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x84,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ngt_f16 v255, v2 -// GFX12: v_cmpx_ngt_f16_e64 +v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_nle_f16 v255, v2 -// GFX12: v_cmpx_nle_f16_e64 +v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nlg_f16 v255, v2 -// GFX12: v_cmpx_nlg_f16_e64 +v_cmpx_gt_i16 v1, v255 +// GFX12: v_cmpx_gt_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_nlt_f16 v255, v2 -// GFX12: v_cmpx_nlt_f16_e64 +v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_o_f16 v255, v2 -// GFX12: v_cmpx_o_f16_e64 +v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_u_f16 v255, v2 -// GFX12: v_cmpx_u_f16_e64 +v_cmpx_gt_i16 v255, v2 +// GFX12: v_cmpx_gt_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_class_f16_e64 +v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_eq_f16_e64 +v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_eq_i16_e64 +v_cmpx_gt_u16 v1, v255 +// GFX12: v_cmpx_gt_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_eq_u16_e64 +v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ge_f16_e64 +v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ge_i16_e64 +v_cmpx_gt_u16 v255, v2 +// GFX12: v_cmpx_gt_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ge_u16_e64 +v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_gt_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_gt_f16_e64 +v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_gt_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_gt_i16_e64 +v_cmpx_le_f16 v1, v255 +// GFX12: v_cmpx_le_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_gt_u16_e64 +v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] v_cmpx_le_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_le_f16_e64 +// GFX12: v_cmpx_le_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_le_i16_e64 +v_cmpx_le_f16 v255, v2 +// GFX12: v_cmpx_le_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x83,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_le_u16_e64 +v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lg_f16_e64 +v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64 +v_cmpx_le_i16 v1, v255 +// GFX12: v_cmpx_le_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_i16_e64 +v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_u16_e64 +v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ne_i16_e64 +v_cmpx_le_i16 v255, v2 +// GFX12: v_cmpx_le_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ne_u16_e64 +v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_neq_f16_e64 +v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_nge_f16_e64 +v_cmpx_le_u16 v1, v255 +// GFX12: v_cmpx_le_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ngt_f16_e64 +v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_nle_f16_e64 +v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_nlg_f16_e64 +v_cmpx_le_u16 v255, v2 +// GFX12: v_cmpx_le_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_nlt_f16_e64 +v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_le_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_o_f16_e64 +v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_le_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_u_f16_e64 +v_cmpx_lg_f16 v1, v255 +// GFX12: v_cmpx_lg_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_class_f16_e64 +v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_eq_f16_e64 +v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lg_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_eq_i16_e64 +v_cmpx_lg_f16 v255, v2 +// GFX12: v_cmpx_lg_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x85,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_eq_u16_e64 +v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lg_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ge_f16_e64 +v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ge_i16_e64 +v_cmpx_lt_f16 v1, v255 +// GFX12: v_cmpx_lt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ge_u16_e64 +v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_gt_f16_e64 +v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_gt_i16_e64 +v_cmpx_lt_f16 v255, v2 +// GFX12: v_cmpx_lt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_gt_u16_e64 +v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_le_f16_e64 +v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_le_i16_e64 +v_cmpx_lt_i16 v1, v255 +// GFX12: v_cmpx_lt_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_le_u16_e64 +v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lg_f16_e64 +v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64 +v_cmpx_lt_i16 v255, v2 +// GFX12: v_cmpx_lt_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_i16_e64 +v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_lt_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_lt_u16_e64 +v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ne_i16_e64 +v_cmpx_lt_u16 v1, v255 +// GFX12: v_cmpx_lt_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ne_u16_e64 +v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_neq_f16_e64 +v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_nge_f16_e64 +v_cmpx_lt_u16 v255, v2 +// GFX12: v_cmpx_lt_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_ngt_f16_e64 +v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_lt_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_nle_f16_e64 +v_cmpx_lt_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_lt_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_nlg_f16_e64 +v_cmpx_ne_i16 v1, v255 +// GFX12: v_cmpx_ne_i16_e64 v1, v255 ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_nlt_f16_e64 +v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_o_f16_e64 +v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_cmpx_u_f16_e64 +v_cmpx_ne_i16 v255, v2 +// GFX12: v_cmpx_ne_i16_e64 v255, v2 ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_class_f16_e64 +v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ne_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_eq_f16_e64 +v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_eq_i16_e64 +v_cmpx_ne_u16 v1, v255 +// GFX12: v_cmpx_ne_u16_e64 v1, v255 ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_eq_u16_e64 +v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ge_f16_e64 +v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ge_i16_e64 +v_cmpx_ne_u16 v255, v2 +// GFX12: v_cmpx_ne_u16_e64 v255, v2 ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ge_u16_e64 +v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ne_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_gt_f16_e64 +v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ne_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_gt_i16_e64 +v_cmpx_neq_f16 v1, v255 +// GFX12: v_cmpx_neq_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_gt_u16_e64 +v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_le_f16_e64 +v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_neq_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_le_i16_e64 +v_cmpx_neq_f16 v255, v2 +// GFX12: v_cmpx_neq_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8d,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_le_u16_e64 +v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_neq_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lg_f16_e64 +v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_neq_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64 +v_cmpx_nge_f16 v1, v255 +// GFX12: v_cmpx_nge_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_i16_e64 +v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_u16_e64 +v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nge_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ne_i16_e64 +v_cmpx_nge_f16 v255, v2 +// GFX12: v_cmpx_nge_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x89,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ne_u16_e64 +v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nge_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_neq_f16_e64 +v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nge_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_nge_f16_e64 +v_cmpx_ngt_f16 v1, v255 +// GFX12: v_cmpx_ngt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xff,0x03,0x00] v_cmpx_ngt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ngt_f16_e64 +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_nle_f16_e64 +v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_nlg_f16_e64 +v_cmpx_ngt_f16 v255, v2 +// GFX12: v_cmpx_ngt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8b,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_nlt_f16_e64 +v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_ngt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_o_f16_e64 +v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_ngt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_u_f16_e64 +v_cmpx_nle_f16 v1, v255 +// GFX12: v_cmpx_nle_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_class_f16_e64 +v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_eq_f16_e64 +v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nle_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_eq_i16_e64 +v_cmpx_nle_f16 v255, v2 +// GFX12: v_cmpx_nle_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8c,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_eq_u16_e64 +v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nle_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ge_f16_e64 +v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nle_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ge_i16_e64 +v_cmpx_nlg_f16 v1, v255 +// GFX12: v_cmpx_nlg_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ge_u16_e64 +v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_gt_f16_e64 +v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_gt_i16_e64 +v_cmpx_nlg_f16 v255, v2 +// GFX12: v_cmpx_nlg_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8a,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_gt_u16_e64 +v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlg_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_le_f16_e64 +v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_le_i16_e64 +v_cmpx_nlt_f16 v1, v255 +// GFX12: v_cmpx_nlt_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_le_u16_e64 +v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lg_f16_e64 +v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_f16_e64 +v_cmpx_nlt_f16 v255, v2 +// GFX12: v_cmpx_nlt_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x8e,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_i16_e64 +v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_nlt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_lt_u16_e64 +v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_nlt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ne_i16_e64 +v_cmpx_o_f16 v1, v255 +// GFX12: v_cmpx_o_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ne_u16_e64 +v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_o_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_neq_f16_e64 +v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_o_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_nge_f16_e64 +v_cmpx_o_f16 v255, v2 +// GFX12: v_cmpx_o_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x87,0xd4,0xff,0x05,0x02,0x00] -v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_ngt_f16_e64 +v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_o_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_nle_f16_e64 +v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_o_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_nlg_f16_e64 +v_cmpx_u_f16 v1, v255 +// GFX12: v_cmpx_u_f16_e64 v1, v255 ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xff,0x03,0x00] -v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_nlt_f16_e64 +v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_cmpx_u_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_o_f16_e64 +v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_u_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f16 v255, v2 +// GFX12: v_cmpx_u_f16_e64 v255, v2 ; encoding: [0x7e,0x00,0x88,0xd4,0xff,0x05,0x02,0x00] v_cmpx_u_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_cmpx_u_f16_e64 +// GFX12: v_cmpx_u_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_cmpx_u_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt index 29e39c9e60ec874..b87c7bb9b268f96 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 # W32: v_cmp_class_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0xfa,0x7c] # W64: v_cmp_class_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0xfa,0x7c] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt index 460b222d0b7d9a3..40735cef0c5360b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64 # W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] # W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt index 18f9db15c980f8d..00d5106cc90a5a5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 # W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] # W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt index 863d747e9c0ba3c..c6019b7fdfa75d6 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 # GFX11: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d] 0x01,0x05,0xfa,0x7d diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt index e88d666fb3f935c..d3f92d0358188b6 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 # GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] 0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt index 6c51c9ba5a24f16..3c5b243e497f1ea 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11 # GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] 0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt index 320e85238e36e47..8fff403b502ac10 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 # W32: v_cmp_class_f16_e32 vcc_lo, v1, v2 ; encoding: [0x01,0x05,0xfa,0x7c] # W64: v_cmp_class_f16_e32 vcc, v1, v2 ; encoding: [0x01,0x05,0xfa,0x7c] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt index be40d5fc8af9ea3..a840f0a9c2bec55 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 # W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] # W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt index 3fb7bef6c18aeb0..0300ff215c35242 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64 # W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] # W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt index 5a3b1f183ebfa7b..74213ba162ae720 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 # GFX12: v_cmpx_class_f16_e32 v1, v2 ; encoding: [0x01,0x05,0xfa,0x7d] 0x01,0x05,0xfa,0x7d diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt index 704a17f8b091fc6..fe9ef4f9e90d0ba 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 # GFX12: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff] 0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt index 0b030b9e316ea46..53f15e8ae43147f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt @@ -1,5 +1,7 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12 # GFX12: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05] 0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05 From 670512b5c32217e37796fd8d42101ac24cdb4a8d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 28 Oct 2024 14:37:44 +0000 Subject: [PATCH 183/425] [AArch64] Regenerate srem-lkk.ll to add missing asm comments Reduces diff in #112588 --- llvm/test/CodeGen/AArch64/srem-lkk.ll | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/srem-lkk.ll b/llvm/test/CodeGen/AArch64/srem-lkk.ll index 5ff178937ebbfb4..d9f91449dffb808 100644 --- a/llvm/test/CodeGen/AArch64/srem-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-lkk.ll @@ -4,14 +4,14 @@ define i32 @fold_srem_positive_odd(i32 %x) { ; CHECK-LABEL: fold_srem_positive_odd: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: mov w8, #37253 // =0x9185 ; CHECK-NEXT: movk w8, #44150, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 ; CHECK-NEXT: lsr x8, x8, #32 ; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: asr w9, w8, #6 ; CHECK-NEXT: add w8, w9, w8, lsr #31 -; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: mov w9, #95 // =0x5f ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, 95 @@ -22,13 +22,13 @@ define i32 @fold_srem_positive_odd(i32 %x) { define i32 @fold_srem_positive_even(i32 %x) { ; CHECK-LABEL: fold_srem_positive_even: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #36849 +; CHECK-NEXT: mov w8, #36849 // =0x8ff1 ; CHECK-NEXT: movk w8, #15827, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 ; CHECK-NEXT: lsr x9, x8, #63 ; CHECK-NEXT: asr x8, x8, #40 ; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: mov w9, #1060 +; CHECK-NEXT: mov w9, #1060 // =0x424 ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, 1060 @@ -39,13 +39,13 @@ define i32 @fold_srem_positive_even(i32 %x) { define i32 @fold_srem_negative_odd(i32 %x) { ; CHECK-LABEL: fold_srem_negative_odd: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65445 +; CHECK-NEXT: mov w8, #65445 // =0xffa5 ; CHECK-NEXT: movk w8, #42330, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 ; CHECK-NEXT: lsr x9, x8, #63 ; CHECK-NEXT: asr x8, x8, #40 ; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: mov w9, #-723 +; CHECK-NEXT: mov w9, #-723 // =0xfffffd2d ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, -723 @@ -56,13 +56,13 @@ define i32 @fold_srem_negative_odd(i32 %x) { define i32 @fold_srem_negative_even(i32 %x) { ; CHECK-LABEL: fold_srem_negative_even: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #62439 +; CHECK-NEXT: mov w8, #62439 // =0xf3e7 ; CHECK-NEXT: movk w8, #64805, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 ; CHECK-NEXT: lsr x9, x8, #63 ; CHECK-NEXT: asr x8, x8, #40 ; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: mov w9, #-22981 +; CHECK-NEXT: mov w9, #-22981 // =0xffffa63b ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret %1 = srem i32 %x, -22981 @@ -74,14 +74,14 @@ define i32 @fold_srem_negative_even(i32 %x) { define i32 @combine_srem_sdiv(i32 %x) { ; CHECK-LABEL: combine_srem_sdiv: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: mov w8, #37253 // =0x9185 ; CHECK-NEXT: movk w8, #44150, lsl #16 ; CHECK-NEXT: smull x8, w0, w8 ; CHECK-NEXT: lsr x8, x8, #32 ; CHECK-NEXT: add w8, w8, w0 ; CHECK-NEXT: asr w9, w8, #6 ; CHECK-NEXT: add w8, w9, w8, lsr #31 -; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: mov w9, #95 // =0x5f ; CHECK-NEXT: msub w9, w8, w9, w0 ; CHECK-NEXT: add w0, w9, w8 ; CHECK-NEXT: ret @@ -95,14 +95,14 @@ define i32 @combine_srem_sdiv(i32 %x) { define i64 @dont_fold_srem_i64(i64 %x) { ; CHECK-LABEL: dont_fold_srem_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #58849 +; CHECK-NEXT: mov x8, #58849 // =0xe5e1 ; CHECK-NEXT: movk x8, #48148, lsl #16 ; CHECK-NEXT: movk x8, #33436, lsl #32 ; CHECK-NEXT: movk x8, #21399, lsl #48 ; CHECK-NEXT: smulh x8, x0, x8 ; CHECK-NEXT: asr x9, x8, #5 ; CHECK-NEXT: add x8, x9, x8, lsr #63 -; CHECK-NEXT: mov w9, #98 +; CHECK-NEXT: mov w9, #98 // =0x62 ; CHECK-NEXT: msub x0, x8, x9, x0 ; CHECK-NEXT: ret %1 = srem i64 %x, 98 From 5ac3f3c45cd4d2934d36565eae7e33a629ff7b22 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Oct 2024 09:06:12 -0700 Subject: [PATCH 184/425] [RISCV] Add DestEEW = EEW1 to VMADC. (#113013) It was present on VMSBC but not VMADC. Reorder the instructions to avoid duplicate 'let' statements. --- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 6f7d14d5503bd3b..29759132c47d7e6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -1142,13 +1142,11 @@ defm VSEXT_VF2 : VALU_MV_VS2<"vsext.vf2", 0b010010, 0b00111>; // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions defm VADC_V : VALUm_IV_V_X_I<"vadc", 0b010000>; -let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in { -defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>; -defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>; -} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint defm VSBC_V : VALUm_IV_V_X<"vsbc", 0b010010>; let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, DestEEW = EEW1 in { +defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>; +defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>; defm VMSBC_V : VALUm_IV_V_X<"vmsbc", 0b010011>; defm VMSBC_V : VALUNoVm_IV_V_X<"vmsbc", 0b010011>; } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, DestEEW = EEW1 From eb53d08bce52d4503cf38e26bedfcbd0fed9cd3a Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 28 Oct 2024 09:07:14 -0700 Subject: [PATCH 185/425] [llvm-exegesis] Add Pfm Counters for SapphireRapids (#113847) This patch adds the appropriate hookups in X86PfmCounters.td for SapphireRapids. This is mostly to fix errors when some of my jobs that only really need dummy counters get scheduled on sapphire rapids machines, but figured I might as well do it properly while here. I do not have hardware access to test this currently, but this matches exactly with what is in the libpfm source code. --- llvm/lib/Target/X86/X86PfmCounters.td | 16 ++++++++++++++++ llvm/lib/Target/X86/X86SchedSapphireRapids.td | 6 ++++++ 2 files changed, 22 insertions(+) diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td index c30e989cdc2af19..38d8d19091e0fdc 100644 --- a/llvm/lib/Target/X86/X86PfmCounters.td +++ b/llvm/lib/Target/X86/X86PfmCounters.td @@ -220,6 +220,22 @@ def AlderLakePfmCounters : ProcPfmCounters { } def : PfmCountersBinding<"alderlake", AlderLakePfmCounters>; +def SapphireRapidsPfmCounters : ProcPfmCounters { + let CycleCounter = UnhaltedCoreCyclesPfmCounter; + let UopsCounter = UopsIssuedPfmCounter; + let IssueCounters = [ + PfmIssueCounter<"SPRPort00", "uops_dispatched_port:port_0">, + PfmIssueCounter<"SPRPort01", "uops_dispatched_port:port_1">, + PfmIssueCounter<"SPRPort02_03_10", "uops_dispatched_port:port_2_3_10">, + PfmIssueCounter<"SPRPort04_09", "uops_dispatched_port:port_4_9">, + PfmIssueCounter<"SPRPort05_11", "uops_dispatched_port:port_5_11">, + PfmIssueCounter<"SPRPort06", "uops_dispatched_port:port_6">, + PfmIssueCounter<"SPRPort07_08", "uops_dispatched_port:port_7_8">, + ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; +} +def : PfmCountersBinding<"sapphirerapids", SapphireRapidsPfmCounters>; + // AMD X86 Counters. defvar DefaultAMDPfmValidationCounters = [ PfmValidationCounter, diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 6e292da4e293dbd..b0ebe70c31fd449 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -59,6 +59,8 @@ def SPRPort01_05 : ProcResGroup<[SPRPort01, SPRPort05]>; def SPRPort01_05_10 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort10]>; def SPRPort02_03 : ProcResGroup<[SPRPort02, SPRPort03]>; def SPRPort02_03_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort11]>; +def SPRPort02_03_10 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort10]>; +def SPRPort05_11 : ProcResGroup<[SPRPort05, SPRPort11]>; def SPRPort07_08 : ProcResGroup<[SPRPort07, SPRPort08]>; // EU has 112 reservation stations. @@ -78,6 +80,10 @@ def SPRPort02_03_07_08_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07, let BufferSize = 72; } +def SPRPortAny : ProcResGroup<[SPRPort00, SPRPort01, SPRPort02, SPRPort03, + SPRPort04, SPRPort05, SPRPort06, SPRPort07, + SPRPort08, SPRPort09, SPRPort10, SPRPort11]>; + // Integer loads are 5 cycles, so ReadAfterLd registers needn't be available // until 5 cycles after the memory operand. def : ReadAdvance; From 80f38fbdcfa96316908bb4ac3481d9eac6abf60e Mon Sep 17 00:00:00 2001 From: Boaz Brickner Date: Mon, 28 Oct 2024 17:10:07 +0100 Subject: [PATCH 186/425] [clang] [NFC] Deduplicate the logic between StringMapEntry.first() and StringMapEntry.getKey() (#113735) --- llvm/include/llvm/ADT/StringMapEntry.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h index 98b51cc1aebd59e..d93af5aedc39d70 100644 --- a/llvm/include/llvm/ADT/StringMapEntry.h +++ b/llvm/include/llvm/ADT/StringMapEntry.h @@ -116,9 +116,7 @@ class StringMapEntry final : public StringMapEntryStorage { return reinterpret_cast(this + 1); } - StringRef first() const { - return StringRef(getKeyData(), this->getKeyLength()); - } + StringRef first() const { return getKey(); } /// Create a StringMapEntry for the specified key construct the value using /// \p InitiVals. From 7a710110fcb2ad5d903ec41ba6a63193cf03edc0 Mon Sep 17 00:00:00 2001 From: Petr Kurapov Date: Mon, 28 Oct 2024 17:12:12 +0100 Subject: [PATCH 187/425] =?UTF-8?q?[MLIR][Vector]=20Remove=20unused=20and?= =?UTF-8?q?=20unimplemented=20Vector=5FWarpExecuteOnLa=E2=80=A6=20(#112338?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ne0Op builder Removing the declaration instead of implementing the builder as discussed in #110106 --- mlir/include/mlir/Dialect/Vector/IR/VectorOps.td | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index e859270cf9a5e5c..474f4ccf4891de9 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -3090,7 +3090,6 @@ def Vector_WarpExecuteOnLane0Op : Vector_Op<"warp_execute_on_lane_0", let skipDefaultBuilders = 1; let builders = [ - OpBuilder<(ins "Value":$laneid, "int64_t":$warpSize)>, OpBuilder<(ins "TypeRange":$resultTypes, "Value":$laneid, "int64_t":$warpSize)>, // `blockArgTypes` are different than `args` types as they are they From 106259510f6a7a3824dd34f78a77ead150dd2154 Mon Sep 17 00:00:00 2001 From: CarolineConcatto Date: Mon, 28 Oct 2024 16:36:02 +0000 Subject: [PATCH 188/425] =?UTF-8?q?[AArch64]Add=20convert=20and=20multiply?= =?UTF-8?q?-add=20SIMD&FP=20assembly/disassembly=20in=E2=80=A6=20(#113296)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …structions This patch adds the following instructions: Conversion between floating-point and integer: FCVT{AS, AU, MS, MU, NS, NU, PS, PU, ZS, ZU} {S,U}CVTF Advanced SIMD three-register extension: FMMLA According to https://developer.arm.com/documentation/ddi0602 Co-authored-by: Marian Lukac marian.lukac@arm.com Co-authored-by: Spencer Abson spencer.abson@arm.com --- .../lib/Target/AArch64/AArch64InstrFormats.td | 109 ++++++-- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 25 +- llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s | 46 ++++ llvm/test/MC/AArch64/FP8/fmmla.s | 25 ++ llvm/test/MC/AArch64/armv9.6a-cvtf.s | 61 +++++ llvm/test/MC/AArch64/armv9.6a-fcvt.s | 253 ++++++++++++++++++ .../test/MC/AArch64/directive-arch-negative.s | 18 ++ llvm/test/MC/AArch64/directive-arch.s | 12 + .../directive-arch_extension-negative.s | 18 ++ .../MC/AArch64/directive-arch_extension.s | 12 + llvm/test/MC/AArch64/directive-cpu.s | 12 + llvm/test/MC/AArch64/neon-diagnostics.s | 108 +++++--- 12 files changed, 637 insertions(+), 62 deletions(-) create mode 100644 llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s create mode 100644 llvm/test/MC/AArch64/FP8/fmmla.s create mode 100644 llvm/test/MC/AArch64/armv9.6a-cvtf.s create mode 100644 llvm/test/MC/AArch64/armv9.6a-fcvt.s diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 9dd417314fbb865..837d737b28588c4 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -5234,6 +5234,32 @@ multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, } } +multiclass FPToIntegerSIMDScalar rmode, bits<3> opcode, string asm> { + // double-precision to 32-bit SIMD/FPR + def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm, + []> { + let Inst{31} = 0; // 32-bit FPR flag + } + + // half-precision to 32-bit SIMD/FPR + def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm, + []> { + let Inst{31} = 0; // 32-bit FPR flag + } + + // half-precision to 64-bit SIMD/FPR + def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm, + []> { + let Inst{31} = 1; // 64-bit FPR flag + } + + // single-precision to 64-bit SIMD/FPR + def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm, + []> { + let Inst{31} = 1; // 64-bit FPR flag + } +} + multiclass FPToIntegerScaled rmode, bits<3> opcode, string asm, SDPatternOperator OpN> { // Scaled half-precision to 32-bit @@ -5295,7 +5321,7 @@ multiclass FPToIntegerScaled rmode, bits<3> opcode, string asm, //--- let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in -class BaseIntegerToFP rmode, bits<3> opcode, RegisterClass srcType, RegisterClass dstType, Operand immType, string asm, list pattern> : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), @@ -5305,15 +5331,16 @@ class BaseIntegerToFP Rn; bits<6> scale; let Inst{30-24} = 0b0011110; - let Inst{21-17} = 0b00001; - let Inst{16} = isUnsigned; + let Inst{21} = 0b0; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; let Inst{15-10} = scale; let Inst{9-5} = Rn; let Inst{4-0} = Rd; } let mayRaiseFPException = 1, Uses = [FPCR] in -class BaseIntegerToFPUnscaled rmode, bits<3> opcode, RegisterClass srcType, RegisterClass dstType, ValueType dvt, string asm, SDPatternOperator node> : I<(outs dstType:$Rd), (ins srcType:$Rn), @@ -5323,49 +5350,50 @@ class BaseIntegerToFPUnscaled Rn; bits<6> scale; let Inst{30-24} = 0b0011110; - let Inst{21-17} = 0b10001; - let Inst{16} = isUnsigned; + let Inst{21} = 0b1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; let Inst{15-10} = 0b000000; let Inst{9-5} = Rn; let Inst{4-0} = Rd; } -multiclass IntegerToFP { +multiclass IntegerToFP rmode, bits<3> opcode, string asm, SDPatternOperator node> { // Unscaled - def UWHri: BaseIntegerToFPUnscaled { + def UWHri: BaseIntegerToFPUnscaled { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b11; // 16-bit FPR flag let Predicates = [HasFullFP16]; } - def UWSri: BaseIntegerToFPUnscaled { + def UWSri: BaseIntegerToFPUnscaled { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b00; // 32-bit FPR flag } - def UWDri: BaseIntegerToFPUnscaled { + def UWDri: BaseIntegerToFPUnscaled { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b01; // 64-bit FPR flag } - def UXHri: BaseIntegerToFPUnscaled { + def UXHri: BaseIntegerToFPUnscaled { let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b11; // 16-bit FPR flag let Predicates = [HasFullFP16]; } - def UXSri: BaseIntegerToFPUnscaled { + def UXSri: BaseIntegerToFPUnscaled { let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b00; // 32-bit FPR flag } - def UXDri: BaseIntegerToFPUnscaled { + def UXDri: BaseIntegerToFPUnscaled { let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b01; // 64-bit FPR flag } // Scaled - def SWHri: BaseIntegerToFP { @@ -5375,7 +5403,7 @@ multiclass IntegerToFP { let Predicates = [HasFullFP16]; } - def SWSri: BaseIntegerToFP { @@ -5384,7 +5412,7 @@ multiclass IntegerToFP { let scale{5} = 1; } - def SWDri: BaseIntegerToFP { @@ -5393,7 +5421,7 @@ multiclass IntegerToFP { let scale{5} = 1; } - def SXHri: BaseIntegerToFP { @@ -5402,7 +5430,7 @@ multiclass IntegerToFP { let Predicates = [HasFullFP16]; } - def SXSri: BaseIntegerToFP { @@ -5410,7 +5438,7 @@ multiclass IntegerToFP { let Inst{23-22} = 0b00; // 32-bit FPR flag } - def SXDri: BaseIntegerToFP { @@ -5419,6 +5447,32 @@ multiclass IntegerToFP { } } +multiclass IntegerToFPSIMDScalar rmode, bits<3> opcode, string asm, SDPatternOperator node = null_frag> { + // 32-bit to half-precision + def HSr: BaseIntegerToFPUnscaled { + let Inst{31} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + } + + // 32-bit to double-precision + def DSr: BaseIntegerToFPUnscaled { + let Inst{31} = 0; // 32-bit FPR flag + let Inst{23-22} = 0b01; // 64-bit FPR flag + } + + // 64-bit to half-precision + def HDr: BaseIntegerToFPUnscaled { + let Inst{31} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b11; // 16-bit FPR flag + } + + // 64-bit to single-precision + def SDr: BaseIntegerToFPUnscaled { + let Inst{31} = 1; // 64-bit FPR flag + let Inst{23-22} = 0b00; // 32-bit FPR flag + } +} + //--- // Unscaled integer <-> floating point conversion (i.e. FMOV) //--- @@ -13126,3 +13180,20 @@ multiclass AtomicFPStore op0, string asm> { def S : BaseAtomicFPStore; def H : BaseAtomicFPStore; } + +class BaseSIMDThreeSameVectorFP8MatrixMul size, string kind> + : BaseSIMDThreeSameVectorTied<1, 1, {size, 0}, 0b11101, + V128, asm, ".16b", []> { + let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn", ".16b", + ", $Rm", ".16b", "}"); +} + +multiclass SIMDThreeSameVectorFP8MatrixMul{ + def v8f16: BaseSIMDThreeSameVectorFP8MatrixMul{ + let Predicates = [HasNEON, HasF8F16MM]; + } + def v4f32: BaseSIMDThreeSameVectorFP8MatrixMul{ + let Predicates = [HasNEON, HasF8F32MM]; + } +} + diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index fe3c8578b52aa49..6194de2d56b6304 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4838,6 +4838,19 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; +let Predicates = [HasNEON, HasFPRCVT] in{ + defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">; + defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">; + defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">; + defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">; + defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">; + defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">; + defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">; + defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">; + defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">; + defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">; +} + // AArch64's FCVT instructions saturate when out of range. multiclass FPToIntegerSatPats { let Predicates = [HasFullFP16] in { @@ -4996,8 +5009,13 @@ def : Pat<(i64 (any_llround f64:$Rn)), // Scaled integer to floating point conversion instructions. //===----------------------------------------------------------------------===// -defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; -defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; +defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>; +defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>; + +let Predicates = [HasNEON, HasFPRCVT] in { + defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf">; + defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf">; +} def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)), (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>; @@ -10547,6 +10565,9 @@ let Predicates = [HasLSFE] in { def STBFMINNML : BaseAtomicFPStore; } +let Uses = [FPMR, FPCR] in +defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">; + include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" include "AArch64SMEInstrInfo.td" diff --git a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s new file mode 100644 index 000000000000000..cf8d216581240ad --- /dev/null +++ b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s @@ -0,0 +1,46 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm 2>&1 < %s| FileCheck %s + +fmmla v0.4h, v1.16b, v2.16b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmmla v0.4h, v1.16b, v2.16b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla v0.8s, v1.16b, v2.16b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier +// CHECK-NEXT: fmmla v0.8s, v1.16b, v2.16b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla v0.4s, v1.4s, v2.4s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmmla v0.4s, v1.4s, v2.4s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla v0.8h, v1.8h, v2.8h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla v0.16b, v1.16b, v2.16b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmmla v0.16b, v1.16b, v2.16b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla v0.d, v1.16b, v2.16b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmmla v0.d, v1.16b, v2.16b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla v0.2d, v1.16b, v2.16b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmmla v0.2d, v1.16b, v2.16b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla v0.8h, v1.8b, v2.8b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmmla v0.8h, v1.8b, v2.8b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla v0.4s, v1.8b, v2.8b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmmla v0.4s, v1.8b, v2.8b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/FP8/fmmla.s b/llvm/test/MC/AArch64/FP8/fmmla.s new file mode 100644 index 000000000000000..922f4c9d918ce98 --- /dev/null +++ b/llvm/test/MC/AArch64/FP8/fmmla.s @@ -0,0 +1,25 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm < %s \ +// RUN: | llvm-objdump -d --mattr=+f8f16mm,+f8f32mm - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=-f8f16mm,-f8f32mm - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +fmmla v0.8h, v1.16b, v2.16b +// CHECK-INST: fmmla v0.8h, v1.16b, v2.16b +// CHECK-ENCODING: [0x20,0xec,0x02,0x6e] +// CHECK-ERROR: instruction requires: f8f16mm +// CHECK-UNKNOWN: 6e02ec20 + +fmmla v0.4s, v1.16b, v2.16b +// CHECK-INST: fmmla v0.4s, v1.16b, v2.16b +// CHECK-ENCODING: [0x20,0xec,0x82,0x6e] +// CHECK-ERROR: instruction requires: f8f32mm +// CHECK-UNKNOWN: 6e82ec20 \ No newline at end of file diff --git a/llvm/test/MC/AArch64/armv9.6a-cvtf.s b/llvm/test/MC/AArch64/armv9.6a-cvtf.s new file mode 100644 index 000000000000000..6858d3896af5aba --- /dev/null +++ b/llvm/test/MC/AArch64/armv9.6a-cvtf.s @@ -0,0 +1,61 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fprcvt < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fprcvt < %s \ +// RUN: | llvm-objdump -d --mattr=+fprcvt - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fprcvt < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=-fprcvt - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fprcvt < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+fprcvt -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +scvtf d1, s2 +// CHECK-INST: scvtf d1, s2 +// CHECK-ENCODING: [0x41,0x00,0x7c,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e7c0041 + +scvtf h1, s2 +// CHECK-INST: scvtf h1, s2 +// CHECK-ENCODING: [0x41,0x00,0xfc,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1efc0041 + +scvtf h2, d0 +// CHECK-INST: scvtf h2, d0 +// CHECK-ENCODING: [0x02,0x00,0xfc,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9efc0002 + +scvtf s3, d4 +// CHECK-INST: scvtf s3, d4 +// CHECK-ENCODING: [0x83,0x00,0x3c,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3c0083 + +ucvtf d1, s2 +// CHECK-INST: ucvtf d1, s2 +// CHECK-ENCODING: [0x41,0x00,0x7d,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e7d0041 + +ucvtf h1, s2 +// CHECK-INST: ucvtf h1, s2 +// CHECK-ENCODING: [0x41,0x00,0xfd,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1efd0041 + +ucvtf h2, d0 +// CHECK-INST: ucvtf h2, d0 +// CHECK-ENCODING: [0x02,0x00,0xfd,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9efd0002 + +ucvtf s3, d4 +// CHECK-INST: ucvtf s3, d4 +// CHECK-ENCODING: [0x83,0x00,0x3d,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3d0083 \ No newline at end of file diff --git a/llvm/test/MC/AArch64/armv9.6a-fcvt.s b/llvm/test/MC/AArch64/armv9.6a-fcvt.s new file mode 100644 index 000000000000000..b14ec93563f5c4d --- /dev/null +++ b/llvm/test/MC/AArch64/armv9.6a-fcvt.s @@ -0,0 +1,253 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fprcvt < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fprcvt < %s \ +// RUN: | llvm-objdump -d --mattr=+fprcvt - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fprcvt < %s \ +// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=-fprcvt - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fprcvt < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+fprcvt -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +fcvtas s0, d1 +// CHECK-INST: fcvtas s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x7a,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e7a0020 + +fcvtas s1, h2 +// CHECK-INST: fcvtas s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xfa,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1efa0041 + +fcvtas d3, h4 +// CHECK-INST: fcvtas d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xfa,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9efa0083 + +fcvtas d0, s5 +// CHECK-INST: fcvtas d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x3a,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3a00a0 + +fcvtau s0, d1 +// CHECK-INST: fcvtau s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x7b,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e7b0020 + +fcvtau s1, h2 +// CHECK-INST: fcvtau s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xfb,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1efb0041 + +fcvtau d3, h4 +// CHECK-INST: fcvtau d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xfb,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9efb0083 + +fcvtau d0, s5 +// CHECK-INST: fcvtau d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x3b,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3b00a0 + +fcvtms s0, d1 +// CHECK-INST: fcvtms s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x74,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e740020 + +fcvtms s1, h2 +// CHECK-INST: fcvtms s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xf4,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1ef40041 + +fcvtms d3, h4 +// CHECK-INST: fcvtms d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xf4,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9ef40083 + +fcvtms d0, s5 +// CHECK-INST: fcvtms d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x34,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3400a0 + +fcvtmu s0, d1 +// CHECK-INST: fcvtmu s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x75,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e750020 + +fcvtmu s1, h2 +// CHECK-INST: fcvtmu s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xf5,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1ef50041 + +fcvtmu d3, h4 +// CHECK-INST: fcvtmu d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xf5,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9ef50083 + +fcvtmu d0, s5 +// CHECK-INST: fcvtmu d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x35,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3500a0 + +fcvtns s0, d1 +// CHECK-INST: fcvtns s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x6a,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e6a0020 + +fcvtns s1, h2 +// CHECK-INST: fcvtns s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xea,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1eea0041 + +fcvtns d3, h4 +// CHECK-INST: fcvtns d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xea,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9eea0083 + +fcvtns d0, s5 +// CHECK-INST: fcvtns d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x2a,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e2a00a0 + +fcvtnu s0, d1 +// CHECK-INST: fcvtnu s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x6b,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e6b0020 + +fcvtnu s1, h2 +// CHECK-INST: fcvtnu s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xeb,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1eeb0041 + +fcvtnu d3, h4 +// CHECK-INST: fcvtnu d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xeb,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9eeb0083 + +fcvtnu d0, s5 +// CHECK-INST: fcvtnu d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x2b,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e2b00a0 + +fcvtps s0, d1 +// CHECK-INST: fcvtps s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x72,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e720020 + +fcvtps s1, h2 +// CHECK-INST: fcvtps s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xf2,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1ef20041 + +fcvtps d3, h4 +// CHECK-INST: fcvtps d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xf2,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9ef20083 + +fcvtps d0, s5 +// CHECK-INST: fcvtps d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x32,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3200a0 + +fcvtpu s0, d1 +// CHECK-INST: fcvtpu s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x73,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e730020 + +fcvtpu s1, h2 +// CHECK-INST: fcvtpu s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xf3,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1ef30041 + +fcvtpu d3, h4 +// CHECK-INST: fcvtpu d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xf3,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9ef30083 + +fcvtpu d0, s5 +// CHECK-INST: fcvtpu d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x33,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3300a0 + +fcvtzs s0, d1 +// CHECK-INST: fcvtzs s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x76,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e760020 + +fcvtzs s1, h2 +// CHECK-INST: fcvtzs s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xf6,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1ef60041 + +fcvtzs d3, h4 +// CHECK-INST: fcvtzs d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xf6,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9ef60083 + +fcvtzs d0, s5 +// CHECK-INST: fcvtzs d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x36,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3600a0 + +fcvtzu s0, d1 +// CHECK-INST: fcvtzu s0, d1 +// CHECK-ENCODING: [0x20,0x00,0x77,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1e770020 + +fcvtzu s1, h2 +// CHECK-INST: fcvtzu s1, h2 +// CHECK-ENCODING: [0x41,0x00,0xf7,0x1e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 1ef70041 + +fcvtzu d3, h4 +// CHECK-INST: fcvtzu d3, h4 +// CHECK-ENCODING: [0x83,0x00,0xf7,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9ef70083 + +fcvtzu d0, s5 +// CHECK-INST: fcvtzu d0, s5 +// CHECK-ENCODING: [0xa0,0x00,0x37,0x9e] +// CHECK-ERROR: instruction requires: fprcvt +// CHECK-UNKNOWN: 9e3700a0 \ No newline at end of file diff --git a/llvm/test/MC/AArch64/directive-arch-negative.s b/llvm/test/MC/AArch64/directive-arch-negative.s index 19b48ea66bfe6ab..4c17c5609712036 100644 --- a/llvm/test/MC/AArch64/directive-arch-negative.s +++ b/llvm/test/MC/AArch64/directive-arch-negative.s @@ -61,3 +61,21 @@ cbhi x5, x5, #1020 # CHECK: error: instruction requires: cmpbr # CHECK-NEXT: cbhi x5, x5, #1020 + + .arch armv9.6.-a+nofprcvt + scvtf d1, s2 + +# CHECK: error: instruction requires: fprcvt +# CHECK-NEXT: scvtf d1, s2 + + .arch armv9.6.-a+nof8f16mm + fmmla v0.8h, v1.16b, v2.16b + +# CHECK: error: instruction requires: f8f16mm +# CHECK-NEXT: fmmla v0.8h, v1.16b, v2.16b + + .arch armv9.6.-a+nof8f32mm + fmmla v0.4s, v1.16b, v2.16b + +# CHECK: error: instruction requires: f8f32mm +# CHECK-NEXT: fmmla v0.4s, v1.16b, v2.16b diff --git a/llvm/test/MC/AArch64/directive-arch.s b/llvm/test/MC/AArch64/directive-arch.s index 8d9c0cef7536d36..ba605cc5d1a6984 100644 --- a/llvm/test/MC/AArch64/directive-arch.s +++ b/llvm/test/MC/AArch64/directive-arch.s @@ -26,3 +26,15 @@ cbne x5, #31, lbl # CHECK: cbne x5, #31, lbl + + .arch armv9-a+fprcvt + scvtf h1, s2 +# CHECK: scvtf h1, s2 + + .arch armv9-a+f8f16mm + fmmla v0.8h, v1.16b, v2.16b +# CHECK: fmmla v0.8h, v1.16b, v2.16b + + .arch armv9-a+f8f32mm + fmmla v0.4s, v1.16b, v2.16b +# CHECK: fmmla v0.4s, v1.16b, v2.16b diff --git a/llvm/test/MC/AArch64/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/directive-arch_extension-negative.s index 363989d7b9b260e..63da153c1a6ffca 100644 --- a/llvm/test/MC/AArch64/directive-arch_extension-negative.s +++ b/llvm/test/MC/AArch64/directive-arch_extension-negative.s @@ -237,3 +237,21 @@ cbhi x5, x5, #1020 // CHECK: [[@LINE-1]]:1: error: instruction requires: cmpbr // CHECK-NEXT: cbhi x5, x5, #1020 + +.arch_extension fprcvt +.arch_extension nofprcvt +fcvtmu s0, d1 +// CHECK: [[@LINE-1]]:1: error: instruction requires: fprcvt +// CHECK-NEXT: fcvtmu s0, d1 + +.arch_extension f8f16mm +.arch_extension nof8f16mm +fmmla v2.8h, v1.16b, v0.16b +// CHECK: [[@LINE-1]]:1: error: instruction requires: f8f16mm +// CHECK-NEXT: fmmla v2.8h, v1.16b, v0.16b + +.arch_extension f8f32mm +.arch_extension nof8f32mm +fmmla v2.4s, v1.16b, v0.16b +// CHECK: [[@LINE-1]]:1: error: instruction requires: f8f32mm +// CHECK-NEXT: fmmla v2.4s, v1.16b, v0.16b diff --git a/llvm/test/MC/AArch64/directive-arch_extension.s b/llvm/test/MC/AArch64/directive-arch_extension.s index 8a0e1ac471cea78..b8e8696c7abbf10 100644 --- a/llvm/test/MC/AArch64/directive-arch_extension.s +++ b/llvm/test/MC/AArch64/directive-arch_extension.s @@ -189,3 +189,15 @@ msr SSBS, #1 .arch_extension tme tstart x0 // CHECK: tstart x0 + +.arch_extension fprcvt +fcvtns s0, d1 +// CHECK: fcvtns s0, d1 + +.arch_extension f8f16mm +fmmla v1.8h, v2.16b, v3.16b +// CHECK: fmmla v1.8h, v2.16b, v3.16b + +.arch_extension f8f32mm +fmmla v1.4s, v2.16b, v3.16b +// CHECK: fmmla v1.4s, v2.16b, v3.16b diff --git a/llvm/test/MC/AArch64/directive-cpu.s b/llvm/test/MC/AArch64/directive-cpu.s index e3d7b1cd75e5501..1a0a0bd0c5132ce 100644 --- a/llvm/test/MC/AArch64/directive-cpu.s +++ b/llvm/test/MC/AArch64/directive-cpu.s @@ -39,3 +39,15 @@ sha512h q0, q1, v2.2d .cpu generic+sm4 sm4e v2.4s, v15.4s // CHECK: sm4e v2.4s, v15.4s + +.cpu generic+fprcvt +scvtf d1, s2 +// CHECK: scvtf d1, s2 + +.cpu generic+f8f16mm +fmmla v0.8h, v1.16b, v2.16b +// CHECK: fmmla v0.8h, v1.16b, v2.16b + +.cpu generic+f8f32mm +fmmla v0.4s, v1.16b, v2.16b +// CHECK: fmmla v0.4s, v1.16b, v2.16b diff --git a/llvm/test/MC/AArch64/neon-diagnostics.s b/llvm/test/MC/AArch64/neon-diagnostics.s index 9a0445131ddf7c6..6863a89bbe189ed 100644 --- a/llvm/test/MC/AArch64/neon-diagnostics.s +++ b/llvm/test/MC/AArch64/neon-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fprcvt < %s 2> %t // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s //------------------------------------------------------------------------------ @@ -5176,6 +5176,32 @@ // CHECK-ERROR: ucvtf d21, s14, #64 // CHECK-ERROR: ^ +//---------------------------------------------------------------------- +// Scalar Signed Integer Convert To Floating-Point +//--------------------------------------------------------------------- + + scvtf d0, h0 + scvtf s0, h0 +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: scvtf d0, h0 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: scvtf s0, h0 +// CHECK-ERROR: ^ + +//---------------------------------------------------------------------- +// Scalar Unsigned Integer Convert To Floating-Point +//--------------------------------------------------------------------- + + ucvtf d0, h0 + ucvtf s0, h0 +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ucvtf d0, h0 +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: ucvtf s0, h0 +// CHECK-ERROR: ^ + //------------------------------------------------------------------------------ // Element reverse //------------------------------------------------------------------------------ @@ -6943,14 +6969,14 @@ // With Ties To Away //---------------------------------------------------------------------- - fcvtas s0, d0 - fcvtas d0, s0 + fcvtas h0, d0 + fcvtas h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtas s0, d0 +// CHECK-ERROR: fcvtas h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtas d0, s0 +// CHECK-ERROR: fcvtas h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- @@ -6958,14 +6984,14 @@ // Nearest With Ties To Away //---------------------------------------------------------------------- - fcvtau s0, d0 - fcvtau d0, s0 + fcvtau h0, d0 + fcvtau h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtau s0, d0 +// CHECK-ERROR: fcvtau h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtau d0, s0 +// CHECK-ERROR: fcvtau h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- @@ -6973,14 +6999,14 @@ // Minus Infinity //---------------------------------------------------------------------- - fcvtms s0, d0 - fcvtms d0, s0 + fcvtms h0, d0 + fcvtms h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtms s0, d0 +// CHECK-ERROR: fcvtms h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtms d0, s0 +// CHECK-ERROR: fcvtms h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- @@ -6988,14 +7014,14 @@ // Minus Infinity //---------------------------------------------------------------------- - fcvtmu s0, d0 - fcvtmu d0, s0 + fcvtmu h0, d0 + fcvtmu h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtmu s0, d0 +// CHECK-ERROR: fcvtmu h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtmu d0, s0 +// CHECK-ERROR: fcvtmu h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- @@ -7003,14 +7029,14 @@ // With Ties To Even //---------------------------------------------------------------------- - fcvtns s0, d0 - fcvtns d0, s0 + fcvtns h0, d0 + fcvtns h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtns s0, d0 +// CHECK-ERROR: fcvtns h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtns d0, s0 +// CHECK-ERROR: fcvtns h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- @@ -7018,14 +7044,14 @@ // Nearest With Ties To Even //---------------------------------------------------------------------- - fcvtnu s0, d0 - fcvtnu d0, s0 + fcvtnu h0, d0 + fcvtnu h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtnu s0, d0 +// CHECK-ERROR: fcvtnu h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtnu d0, s0 +// CHECK-ERROR: fcvtnu h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- @@ -7033,14 +7059,14 @@ // Positive Infinity //---------------------------------------------------------------------- - fcvtps s0, d0 - fcvtps d0, s0 + fcvtps h0, d0 + fcvtps h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtps s0, d0 +// CHECK-ERROR: fcvtps h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtps d0, s0 +// CHECK-ERROR: fcvtps h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- @@ -7048,28 +7074,28 @@ // Positive Infinity //---------------------------------------------------------------------- - fcvtpu s0, d0 - fcvtpu d0, s0 + fcvtpu h0, d0 + fcvtpu h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtpu s0, d0 +// CHECK-ERROR: fcvtpu h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtpu d0, s0 +// CHECK-ERROR: fcvtpu h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- // Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero //---------------------------------------------------------------------- - fcvtzs s0, d0 - fcvtzs d0, s0 + fcvtzs h0, d0 + fcvtzs h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtzs s0, d0 +// CHECK-ERROR: fcvtzs h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtzs d0, s0 +// CHECK-ERROR: fcvtzs h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- @@ -7077,14 +7103,14 @@ // Zero //---------------------------------------------------------------------- - fcvtzu s0, d0 - fcvtzu d0, s0 + fcvtzu h0, d0 + fcvtzu h0, s0 // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtzu s0, d0 +// CHECK-ERROR: fcvtzu h0, d0 // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction -// CHECK-ERROR: fcvtzu d0, s0 +// CHECK-ERROR: fcvtzu h0, s0 // CHECK-ERROR: ^ //---------------------------------------------------------------------- From ab5d3c9d359d84e454d54e8d91b5c834c42c5a47 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Mon, 28 Oct 2024 09:37:54 -0700 Subject: [PATCH 189/425] [RISCV] Assign different scheduling classes for VMADC/VMSBC (#113009) Split the scheduling classes of VMADC/VMSBC away from that of VADC/VSBC. Because the former are technically mask-producing instructions rather than normal vector arithmetics, which might have different performance characteristics on some processors. This is effectively NFC. --- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 18 +++++++++++------ .../Target/RISCV/RISCVInstrInfoVPseudos.td | 20 +++++++++---------- llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 3 +++ llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td | 3 +++ llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td | 3 +++ llvm/lib/Target/RISCV/RISCVScheduleV.td | 6 ++++++ 6 files changed, 37 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 29759132c47d7e6..4e8619c5ec2392e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -630,31 +630,37 @@ multiclass VMRG_IV_V_X_I funct6> { } multiclass VALUm_IV_V_X funct6> { + // if LSB of funct6 is 1, it's a mask-producing instruction that + // uses a different scheduling class. + defvar WritePrefix = !if(funct6{0}, "WriteVICALUM", "WriteVICALU"); def VM : VALUmVV, - SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV">; + SchedBinaryMC; def XM : VALUmVX, - SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX">; + SchedBinaryMC; } multiclass VALUm_IV_V_X_I funct6> : VALUm_IV_V_X { + // if LSB of funct6 is 1, it's a mask-producing instruction that + // uses a different scheduling class. + defvar WriteSched = !if(funct6{0}, "WriteVICALUMI", "WriteVICALUI"); def IM : VALUmVI, - SchedUnaryMC<"WriteVICALUI", "ReadVICALUV">; + SchedUnaryMC; } multiclass VALUNoVm_IV_V_X funct6> { def V : VALUVVNoVm, - SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", + SchedBinaryMC<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV", forceMasked=0>; def X : VALUVXNoVm, - SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", + SchedBinaryMC<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX", forceMasked=0>; } multiclass VALUNoVm_IV_V_X_I funct6> : VALUNoVm_IV_V_X { def I : VALUVINoVm, - SchedUnaryMC<"WriteVICALUI", "ReadVICALUV", forceMasked=0>; + SchedUnaryMC<"WriteVICALUMI", "ReadVICALUV", forceMasked=0>; } multiclass VALU_FV_F funct6> { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 6b308bc8c9aa0fe..af4f653f57afd5a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -3072,13 +3072,13 @@ multiclass VPseudoVCALUM_VM_XM_IM { defvar mx = m.MX; defm "" : VPseudoBinaryV_VM, - SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1, + SchedBinary<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1, forcePassthruRead=true>; defm "" : VPseudoBinaryV_XM, - SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1, + SchedBinary<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1, forcePassthruRead=true>; defm "" : VPseudoBinaryV_IM, - SchedUnary<"WriteVICALUI", "ReadVICALUV", mx, forceMasked=1, + SchedUnary<"WriteVICALUMI", "ReadVICALUV", mx, forceMasked=1, forcePassthruRead=true>; } } @@ -3089,11 +3089,11 @@ multiclass VPseudoVCALUM_VM_XM { defvar mx = m.MX; defm "" : VPseudoBinaryV_VM, - SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1, + SchedBinary<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1, forcePassthruRead=true>; defm "" : VPseudoBinaryV_XM, - SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1, + SchedBinary<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1, forcePassthruRead=true>; } } @@ -3104,13 +3104,13 @@ multiclass VPseudoVCALUM_V_X_I { defvar mx = m.MX; defm "" : VPseudoBinaryV_VM, - SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, + SchedBinary<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV", mx, forcePassthruRead=true>; defm "" : VPseudoBinaryV_XM, - SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, + SchedBinary<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX", mx, forcePassthruRead=true>; defm "" : VPseudoBinaryV_IM, - SchedUnary<"WriteVICALUI", "ReadVICALUV", mx, + SchedUnary<"WriteVICALUMI", "ReadVICALUV", mx, forcePassthruRead=true>; } } @@ -3120,10 +3120,10 @@ multiclass VPseudoVCALUM_V_X { foreach m = MxList in { defvar mx = m.MX; defm "" : VPseudoBinaryV_VM, - SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, + SchedBinary<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV", mx, forcePassthruRead=true>; defm "" : VPseudoBinaryV_XM, - SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, + SchedBinary<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX", mx, forcePassthruRead=true>; } } diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 24cbe1531c017cb..d07ee393bbcfd09 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -631,6 +631,9 @@ foreach mx = SchedMxList in { defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td index 6926184e92399c2..7a54d2fe1080806 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td @@ -467,6 +467,9 @@ foreach mx = SchedMxList in { defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP400VEXQ0], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP400VEXQ0], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP400VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP400VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP400VEXQ0], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP400VEXQ0], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP400VEXQ0], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP400VEXQ0], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP400VEXQ0], mx, IsWorstCase>; diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td index 59972d781a315ae..c685a6d2b094bed 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td @@ -403,6 +403,9 @@ foreach mx = SchedMxList in { defm "" : LMULWriteResMX<"WriteVICALUV", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICALUX", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICALUI", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP600VectorArith], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICmpV", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICmpX", [SiFiveP600VectorArith], mx, IsWorstCase>; defm "" : LMULWriteResMX<"WriteVICmpI", [SiFiveP600VectorArith], mx, IsWorstCase>; diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td index ee041ea142b94c0..6b9f1dd3218913f 100644 --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -364,6 +364,9 @@ defm "" : LMULSchedWrites<"WriteVExtV">; defm "" : LMULSchedWrites<"WriteVICALUV">; defm "" : LMULSchedWrites<"WriteVICALUX">; defm "" : LMULSchedWrites<"WriteVICALUI">; +defm "" : LMULSchedWrites<"WriteVICALUMV">; +defm "" : LMULSchedWrites<"WriteVICALUMX">; +defm "" : LMULSchedWrites<"WriteVICALUMI">; // 11.6. Vector Single-Width Bit Shift Instructions defm "" : LMULSchedWrites<"WriteVShiftV">; defm "" : LMULSchedWrites<"WriteVShiftX">; @@ -856,6 +859,9 @@ defm "" : LMULWriteRes<"WriteVExtV", []>; defm "" : LMULWriteRes<"WriteVICALUV", []>; defm "" : LMULWriteRes<"WriteVICALUX", []>; defm "" : LMULWriteRes<"WriteVICALUI", []>; +defm "" : LMULWriteRes<"WriteVICALUMV", []>; +defm "" : LMULWriteRes<"WriteVICALUMX", []>; +defm "" : LMULWriteRes<"WriteVICALUMI", []>; defm "" : LMULWriteRes<"WriteVShiftV", []>; defm "" : LMULWriteRes<"WriteVShiftX", []>; defm "" : LMULWriteRes<"WriteVShiftI", []>; From 92412c106f5275b4b385f7c2d882008181de2854 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 28 Oct 2024 09:40:15 -0700 Subject: [PATCH 190/425] [llvm-objdump] Handle -M for --macho --macho -d uses the `parseInputMachO` code path, which does not handle -M. Add -M handling for --macho as well. Close #61019 Pull Request: https://github.com/llvm/llvm-project/pull/113795 --- .../tools/llvm-objdump/MachO/AArch64/aliases.s | 15 +++++++++++++++ llvm/tools/llvm-objdump/MachODump.cpp | 4 ++++ llvm/tools/llvm-objdump/llvm-objdump.cpp | 2 +- llvm/tools/llvm-objdump/llvm-objdump.h | 1 + 4 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/llvm-objdump/MachO/AArch64/aliases.s diff --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/aliases.s b/llvm/test/tools/llvm-objdump/MachO/AArch64/aliases.s new file mode 100644 index 000000000000000..d8b71348a8e40fc --- /dev/null +++ b/llvm/test/tools/llvm-objdump/MachO/AArch64/aliases.s @@ -0,0 +1,15 @@ +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t +# RUN: llvm-objdump --macho -d -M no-aliases %t | FileCheck %s +# RUN: llvm-objdump --macho -d --disassembler-options=no-aliases %t | FileCheck %s + +# CHECK: orr w1, wzr, w2 + +# RUN: llvm-objdump --macho -d %t | FileCheck %s --check-prefix=ALIAS + +# ALIAS: mov w1, w2 + +# RUN: not llvm-objdump --macho -d -M unknown %t 2>&1 | FileCheck %s -DFILE=%t --check-prefix=ERR + +# ERR: error: '[[FILE]]': unrecognized disassembler option: unknown + +mov w1, w2 diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp index b8afb560d2ae9c2..ab6f65cd41a3657 100644 --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -7330,6 +7330,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, // comment causing different diffs with the 'C' disassembler library API. // IP->setCommentStream(CommentStream); + for (StringRef Opt : DisassemblerOptions) + if (!IP->applyTargetSpecificCLOption(Opt)) + reportError(Filename, "unrecognized disassembler option: " + Opt); + // Set up separate thumb disassembler if needed. std::unique_ptr ThumbMRI; std::unique_ptr ThumbAsmInfo; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 8073c898b8a147d..86ba9193dff2d14 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -305,11 +305,11 @@ bool objdump::ArchiveHeaders; bool objdump::Demangle; bool objdump::Disassemble; bool objdump::DisassembleAll; +std::vector objdump::DisassemblerOptions; bool objdump::SymbolDescription; bool objdump::TracebackTable; static std::vector DisassembleSymbols; static bool DisassembleZeroes; -static std::vector DisassemblerOptions; static ColorOutput DisassemblyColor; DIDumpType objdump::DwarfDumpType; static bool DynamicRelocations; diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h index 7778cf6c2784eb7..debaedd33429d02 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.h +++ b/llvm/tools/llvm-objdump/llvm-objdump.h @@ -50,6 +50,7 @@ extern DebugVarsFormat DbgVariables; extern bool Demangle; extern bool Disassemble; extern bool DisassembleAll; +extern std::vector DisassemblerOptions; extern DIDumpType DwarfDumpType; extern std::vector FilterSections; extern bool LeadingAddr; From 6ab26eab4f1e06f2da7b3183c55666ad57f8866e Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Mon, 28 Oct 2024 09:45:03 -0700 Subject: [PATCH 191/425] Check hasOptSize() in shouldOptimizeForSize() (#112626) --- llvm/lib/CodeGen/BranchFolding.cpp | 7 ++----- llvm/lib/CodeGen/CodeGenPrepare.cpp | 9 +++------ llvm/lib/CodeGen/ExpandMemCmp.cpp | 3 +-- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 4 +--- llvm/lib/CodeGen/LiveIntervals.cpp | 3 +-- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 5 +---- llvm/lib/CodeGen/MachineCombiner.cpp | 6 +----- llvm/lib/CodeGen/MachineSizeOpts.cpp | 8 +++++++- llvm/lib/CodeGen/SelectOptimize.cpp | 4 ++-- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +-- llvm/lib/CodeGen/TailDuplicator.cpp | 4 +--- llvm/lib/CodeGen/TargetLoweringBase.cpp | 1 - llvm/lib/Target/X86/X86FixupBWInsts.cpp | 3 +-- llvm/lib/Target/X86/X86OptimizeLEAs.cpp | 4 +--- llvm/lib/Target/X86/X86PadShortFunction.cpp | 4 +--- .../lib/Transforms/IPO/FunctionSpecialization.cpp | 3 +-- llvm/lib/Transforms/Scalar/ConstantHoisting.cpp | 3 +-- .../lib/Transforms/Scalar/LoopLoadElimination.cpp | 7 ++----- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 15 +++++---------- llvm/lib/Transforms/Utils/SizeOpts.cpp | 4 ++++ .../Vectorize/LoopVectorizationLegality.cpp | 7 ++----- 21 files changed, 39 insertions(+), 68 deletions(-) diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 1dc278586f1178b..f8de13650680a89 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -645,11 +645,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, // we don't have to split a block. At worst we will be introducing 1 new // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. - MachineFunction *MF = MBB1->getParent(); - bool OptForSize = - MF->getFunction().hasOptSize() || - (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) && - llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo)); + bool OptForSize = llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) && + llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo); return EffectiveTailLen >= 2 && OptForSize && (FullBlockTail1 || FullBlockTail2); } diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 67a35901511417f..5224a6c8d1a3738 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -612,7 +612,6 @@ bool CodeGenPrepare::_run(Function &F) { // bypassSlowDivision may create new BBs, but we don't want to reapply the // optimization to those blocks. BasicBlock *Next = BB->getNextNode(); - // F.hasOptSize is already checked in the outer if statement. if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) EverMadeChange |= bypassSlowDivision(BB, BypassWidths); BB = Next; @@ -2608,7 +2607,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { // cold block. This interacts with our handling for loads and stores to // ensure that we can fold all uses of a potential addressing computation // into their uses. TODO: generalize this to work over profiling data - if (CI->hasFnAttr(Attribute::Cold) && !OptSize && + if (CI->hasFnAttr(Attribute::Cold) && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) for (auto &Arg : CI->args()) { if (!Arg->getType()->isPointerTy()) @@ -5505,9 +5504,7 @@ static bool FindAllMemoryUses( if (CI->hasFnAttr(Attribute::Cold)) { // If this is a cold call, we can sink the addressing calculation into // the cold path. See optimizeCallInst - bool OptForSize = - OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); - if (!OptForSize) + if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI)) continue; } @@ -7402,7 +7399,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { SelectKind = TargetLowering::ScalarValSelect; if (TLI->isSelectSupported(SelectKind) && - (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize || + (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()))) return false; diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp index 6d626de0b4e635e..1de01e402e59e60 100644 --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -852,8 +852,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, // available load sizes. const bool IsUsedForZeroCmp = IsBCmp || isOnlyUsedInZeroEqualityComparison(CI); - bool OptForSize = CI->getFunction()->hasOptSize() || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); + bool OptForSize = llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); auto Options = TTI->enableMemCmpExpansion(OptForSize, IsUsedForZeroCmp); if (!Options) return false; diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 722ceea29c951c9..513a49b4fc2e4df 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1621,9 +1621,7 @@ int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool llvm::shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { - const auto &F = MBB.getParent()->getFunction(); - return F.hasOptSize() || F.hasMinSize() || - llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI); + return llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI); } void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 21a316cf99a217e..a0b6bf445fa8af1 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -890,8 +890,7 @@ float LiveIntervals::getSpillWeight(bool isDef, bool isUse, const auto *MF = MBB->getParent(); // When optimizing for size we only consider the codesize impact of spilling // the register, not the runtime impact. - if (PSI && (MF->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(MF, PSI, MBFI))) + if (PSI && llvm::shouldOptimizeForSize(MF, PSI, MBFI)) return Weight; return Weight * MBFI->getBlockFreqRelativeToEntryBlock(MBB); } diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index dd5220b4599f959..d1dced9ef28dca5 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -2189,9 +2189,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L, // i.e. when the layout predecessor does not fallthrough to the loop header. // In practice this never happens though: there always seems to be a preheader // that can fallthrough and that is also placed before the header. - bool OptForSize = F->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get()); - if (OptForSize) + if (llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get())) return L.getHeader(); MachineBasicBlock *OldTop = nullptr; @@ -3511,7 +3509,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { initTailDupThreshold(); const bool OptForSize = - MF.getFunction().hasOptSize() || llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI()); // Determine whether to use ext-tsp for perf/size optimization. The method // is beneficial only for instances with at least 3 basic blocks and it can be diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 5bfc1d63ac37640..141cc1f35d66c3c 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -77,9 +77,6 @@ class MachineCombiner : public MachineFunctionPass { TargetSchedModel TSchedModel; - /// True if optimizing for code size. - bool OptSize = false; - public: static char ID; MachineCombiner() : MachineFunctionPass(ID) { @@ -571,7 +568,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { SparseSet RegUnits; RegUnits.setUniverse(TRI->getNumRegUnits()); - bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI); + bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI); bool DoRegPressureReduce = TII->shouldReduceRegisterPressure(MBB, &RegClassInfo); @@ -733,7 +730,6 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { &getAnalysis().getBFI() : nullptr; TraceEnsemble = nullptr; - OptSize = MF.getFunction().hasOptSize(); RegClassInfo.runOnMachineFunction(MF); LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp index 53bed7397d0992e..4d458f2c2e24b4d 100644 --- a/llvm/lib/CodeGen/MachineSizeOpts.cpp +++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp @@ -28,6 +28,8 @@ bool llvm::shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *MBFI, PGSOQueryType QueryType) { + if (MF->getFunction().hasOptSize()) + return true; return shouldFuncOptimizeForSizeImpl(MF, PSI, MBFI, QueryType); } @@ -36,6 +38,8 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, const MachineBlockFrequencyInfo *MBFI, PGSOQueryType QueryType) { assert(MBB); + if (MBB->getParent()->getFunction().hasOptSize()) + return true; return shouldOptimizeForSizeImpl(MBB, PSI, MBFI, QueryType); } @@ -44,7 +48,9 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB, MBFIWrapper *MBFIW, PGSOQueryType QueryType) { assert(MBB); - if (!PSI || !MBFIW) + if (MBB->getParent()->getFunction().hasOptSize()) + return true; + if (!MBFIW) return false; BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB); return shouldOptimizeForSizeImpl(BlockFreq, PSI, &MBFIW->getMBFI(), diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index 61341e1f2d04ce8..55b0eb71ac11fcd 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -431,7 +431,7 @@ PreservedAnalyses SelectOptimizeImpl::run(Function &F, BFI = &FAM.getResult(F); // When optimizing for size, selects are preferable over branches. - if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI)) + if (llvm::shouldOptimizeForSize(&F, PSI, BFI)) return PreservedAnalyses::all(); LI = &FAM.getResult(F); @@ -467,7 +467,7 @@ bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) { TSchedModel.init(TSI); // When optimizing for size, selects are preferable over branches. - if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI)) + if (llvm::shouldOptimizeForSize(&F, PSI, BFI)) return false; return optimizeSelects(F); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0770355ec18c0b5..1a86b3b51234d18 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1370,8 +1370,7 @@ SelectionDAG::~SelectionDAG() { } bool SelectionDAG::shouldOptForSize() const { - return MF->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI); + return llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI); } void SelectionDAG::allnodes_clear() { diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index c5fa4e6211a6310..3f2e1511d403a01 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -586,13 +586,11 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, // duplicate only one, because one branch instruction can be eliminated to // compensate for the duplication. unsigned MaxDuplicateCount; - bool OptForSize = MF->getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI); if (TailDupSize == 0) MaxDuplicateCount = TailDuplicateSize; else MaxDuplicateCount = TailDupSize; - if (OptForSize) + if (llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI)) MaxDuplicateCount = 1; // If the block to be duplicated ends in an unanalyzable fallthrough, don't diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 7a28f7892cbf310..cab0ed23577437f 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1633,7 +1633,6 @@ bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI, // performed in findJumpTable() in SelectionDAGBuiler and // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. const bool OptForSize = - SI->getParent()->getParent()->hasOptSize() || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI); const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp index a0c91d4e3c3d7eb..fe2c8fff577503e 100644 --- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp +++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -443,8 +443,7 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF, // We run after PEI, so we need to AddPristinesAndCSRs. LiveUnits.addLiveOuts(MBB); - OptForSize = MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); + OptForSize = llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); for (MachineInstr &MI : llvm::reverse(MBB)) { if (MachineInstr *NewMI = tryReplaceInstr(&MI, MBB)) diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp index 3172896a8f6092c..280eaf04f23c5ae 100644 --- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp @@ -741,9 +741,7 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { // Remove redundant address calculations. Do it only for -Os/-Oz since only // a code size gain is expected from this part of the pass. - bool OptForSize = MF.getFunction().hasOptSize() || - llvm::shouldOptimizeForSize(&MBB, PSI, MBFI); - if (OptForSize) + if (llvm::shouldOptimizeForSize(&MBB, PSI, MBFI)) Changed |= removeRedundantAddrCalc(LEAs); } diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp index bb59cee8badba7f..50d63e196d1d0cd 100644 --- a/llvm/lib/Target/X86/X86PadShortFunction.cpp +++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp @@ -132,9 +132,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock *MBB = ReturnBB.first; unsigned Cycles = ReturnBB.second; - // Function::hasOptSize is already checked above. - bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI); - if (OptForSize) + if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI)) continue; if (Cycles < Threshold) { diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 20249a20a37e413..48971e9a46355c4 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -1001,8 +1001,7 @@ bool FunctionSpecializer::isCandidateFunction(Function *F) { return false; // If we're optimizing the function for size, we shouldn't specialize it. - if (F->hasOptSize() || - shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) + if (shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass)) return false; // Exit if the function is not executable. There's no point in specializing diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index 4a6dedc93d30650..9b913e5c2a04a53 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -953,8 +953,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, this->Ctx = &Fn.getContext(); this->Entry = &Entry; this->PSI = PSI; - this->OptForSize = Entry.getParent()->hasOptSize() || - llvm::shouldOptimizeForSize(Entry.getParent(), PSI, BFI, + this->OptForSize = llvm::shouldOptimizeForSize(Entry.getParent(), PSI, BFI, PGSOQueryType::IRPass); // Collect all constant candidates. diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index db82f75bad5f34c..9b4a19106d394b9 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -586,11 +586,8 @@ class LoadEliminationForLoop { } auto *HeaderBB = L->getHeader(); - auto *F = HeaderBB->getParent(); - bool OptForSize = F->hasOptSize() || - llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI, - PGSOQueryType::IRPass); - if (OptForSize) { + if (llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI, + PGSOQueryType::IRPass)) { LLVM_DEBUG( dbgs() << "Versioning is needed but not allowed when optimizing " "for size.\n"); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 79e91ad097cf00e..d85e0d994660221 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1413,8 +1413,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) { return nullptr; } - bool OptForSize = CI->getFunction()->hasOptSize() || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + bool OptForSize = llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, PGSOQueryType::IRPass); // If the char is variable but the input str and length are not we can turn @@ -3482,10 +3481,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, return B.CreateIntCast(PtrDiff, CI->getType(), false); } - bool OptForSize = CI->getFunction()->hasOptSize() || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, - PGSOQueryType::IRPass); - if (OptForSize) + if (llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass)) return nullptr; Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI); @@ -3795,10 +3792,8 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) { // Don't rewrite fputs to fwrite when optimising for size because fwrite // requires more arguments and thus extra MOVs are required. - bool OptForSize = CI->getFunction()->hasOptSize() || - llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, - PGSOQueryType::IRPass); - if (OptForSize) + if (llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI, + PGSOQueryType::IRPass)) return nullptr; // We can't optimize if return value is used. diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp index 09c4c1c3c511ff8..7c95e7e6b996b4f 100644 --- a/llvm/lib/Transforms/Utils/SizeOpts.cpp +++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -99,6 +99,8 @@ struct BasicBlockBFIAdapter { bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, PGSOQueryType QueryType) { + if (F->hasOptSize()) + return true; return shouldFuncOptimizeForSizeImpl(F, PSI, BFI, QueryType); } @@ -106,5 +108,7 @@ bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, PGSOQueryType QueryType) { assert(BB); + if (BB->getParent()->hasOptSize()) + return true; return shouldOptimizeForSizeImpl(BB, PSI, BFI, QueryType); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 43be72f0f34d45d..f1568781252c060 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -460,11 +460,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy, const auto &Strides = LAI ? LAI->getSymbolicStrides() : DenseMap(); - Function *F = TheLoop->getHeader()->getParent(); - bool OptForSize = F->hasOptSize() || - llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI, - PGSOQueryType::IRPass); - bool CanAddPredicate = !OptForSize; + bool CanAddPredicate = !llvm::shouldOptimizeForSize( + TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass); int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides, CanAddPredicate, false).value_or(0); if (Stride == 1 || Stride == -1) From 6827a00d4dfe1fa7d479337b6192602744055686 Mon Sep 17 00:00:00 2001 From: SharonXSharon Date: Mon, 28 Oct 2024 09:47:21 -0700 Subject: [PATCH 192/425] [lld][InstrProf] Do not use cstring offset hashes in function order for compression (#113606) --- lld/MachO/BPSectionOrderer.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp index 07b44d48d659328..5db2242a35ef286 100644 --- a/lld/MachO/BPSectionOrderer.cpp +++ b/lld/MachO/BPSectionOrderer.cpp @@ -53,11 +53,8 @@ getRelocHash(const Reloc &reloc, kind = ("Section " + Twine(static_cast(isec->kind()))).str(); if (auto *sym = reloc.referent.dyn_cast()) { kind += (" Symbol " + Twine(static_cast(sym->kind()))).str(); - if (auto *d = dyn_cast(sym)) { - if (isa_and_nonnull(isec)) - return getRelocHash(kind, 0, isec->getOffset(d->value), reloc.addend); + if (auto *d = dyn_cast(sym)) return getRelocHash(kind, sectionIdx.value_or(0), d->value, reloc.addend); - } } return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend); } From af7c58b7ea853ef34462ce97739203e2da3c5894 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Mon, 28 Oct 2024 12:48:42 -0400 Subject: [PATCH 193/425] Remove support for RenderScript (#112916) See https://discourse.llvm.org/t/rfc-deprecate-and-eventually-remove-renderscript-support/81284 for the RFC --- clang/docs/ReleaseNotes.rst | 6 + clang/include/clang/Basic/Attr.td | 9 -- clang/include/clang/Basic/AttrDocs.td | 15 -- clang/include/clang/Basic/LangOptions.def | 1 - clang/include/clang/Basic/LangStandard.h | 1 - clang/include/clang/Basic/TargetInfo.h | 6 - clang/include/clang/Driver/Options.td | 5 +- clang/include/clang/Driver/Types.def | 1 - clang/lib/Basic/LangOptions.cpp | 2 - clang/lib/Basic/LangStandards.cpp | 4 - clang/lib/Basic/TargetInfo.cpp | 1 - clang/lib/Basic/Targets.cpp | 6 - clang/lib/Basic/Targets/AArch64.cpp | 16 -- clang/lib/Basic/Targets/AArch64.h | 11 -- clang/lib/Basic/Targets/ARM.cpp | 16 -- clang/lib/Basic/Targets/ARM.h | 11 -- clang/lib/CodeGen/ABIInfoImpl.cpp | 10 -- clang/lib/CodeGen/ABIInfoImpl.h | 17 --- clang/lib/CodeGen/CGDebugInfo.cpp | 2 - clang/lib/CodeGen/Targets/AArch64.cpp | 11 -- clang/lib/CodeGen/Targets/ARM.cpp | 11 -- clang/lib/Driver/Types.cpp | 2 - .../Serialization/SymbolGraphSerializer.cpp | 1 - clang/lib/Frontend/ASTUnit.cpp | 2 - clang/lib/Frontend/CompilerInvocation.cpp | 7 - clang/lib/Frontend/FrontendActions.cpp | 1 - .../CodeGen/debug-info-renderscript-tag.rs | 3 - clang/test/CodeGen/fp16-ops.c | 2 - clang/test/CodeGen/renderscript.c | 140 ------------------ clang/test/Driver/renderscript.rs | 3 - clang/test/Driver/unknown-std.c | 1 - ...a-attribute-supported-attributes-list.test | 1 - .../predefined-macros-no-warnings.c | 2 - clang/test/Sema/renderscript.rs | 25 ---- clang/www/index.html | 4 +- 35 files changed, 10 insertions(+), 346 deletions(-) delete mode 100644 clang/test/CodeGen/debug-info-renderscript-tag.rs delete mode 100644 clang/test/CodeGen/renderscript.c delete mode 100644 clang/test/Driver/renderscript.rs delete mode 100644 clang/test/Sema/renderscript.rs diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 920a2369f964350..9515e96ffd01c1c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -46,6 +46,12 @@ code bases. - The ``clang-rename`` tool has been removed. +- Removed support for RenderScript targets. This technology is + `officially deprecated `_ + and users are encouraged to + `migrate to Vulkan `_ + or other options. + C/C++ Language Potentially Breaking Changes ------------------------------------------- diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 0259b6e40ca9623..47c93b48175fc85 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -411,7 +411,6 @@ def SYCL : LangOpt<"SYCLIsDevice">; def COnly : LangOpt<"", "!LangOpts.CPlusPlus">; def CPlusPlus : LangOpt<"CPlusPlus">; def OpenCL : LangOpt<"OpenCL">; -def RenderScript : LangOpt<"RenderScript">; def ObjC : LangOpt<"ObjC">; def BlocksSupported : LangOpt<"Blocks">; def ObjCAutoRefCount : LangOpt<"ObjCAutoRefCount">; @@ -1629,14 +1628,6 @@ def OpenCLNoSVM : Attr { let ASTNode = 0; } -def RenderScriptKernel : Attr { - let Spellings = [GNU<"kernel">]; - let Subjects = SubjectList<[Function]>; - let Documentation = [RenderScriptKernelAttributeDocs]; - let LangOpts = [RenderScript]; - let SimpleHandler = 1; -} - def Deprecated : InheritableAttr { let Spellings = [GCC<"deprecated">, Declspec<"deprecated">, CXX11<"","deprecated", 201309>, diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index ee8126cadae2322..7a130c434e73ced 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -5831,21 +5831,6 @@ provided with the regular ``visibility`` attribute. }]; } -def RenderScriptKernelAttributeDocs : Documentation { - let Category = DocCatFunction; - let Content = [{ -``__attribute__((kernel))`` is used to mark a ``kernel`` function in -RenderScript. - -In RenderScript, ``kernel`` functions are used to express data-parallel -computations. The RenderScript runtime efficiently parallelizes ``kernel`` -functions to run on computational resources such as multi-core CPUs and GPUs. -See the RenderScript_ documentation for more information. - -.. _RenderScript: https://developer.android.com/guide/topics/renderscript/compute.html - }]; -} - def XRayDocs : Documentation { let Category = DocCatFunction; let Heading = "xray_always_instrument, xray_never_instrument, xray_log_args"; diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 68db400c22e6c1c..942fc557c5b9496 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -280,7 +280,6 @@ LANGOPT(OpenMPNoNestedParallelism , 1, 0, "Assume that no thread in a parallel LANGOPT(OpenMPOffloadMandatory , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.") LANGOPT(OpenMPForceUSM , 1, 0, "Enable OpenMP unified shared memory mode via compiler.") LANGOPT(NoGPULib , 1, 0, "Indicate a build without the standard GPU libraries.") -LANGOPT(RenderScript , 1, 0, "RenderScript") LANGOPT(HLSL, 1, 0, "HLSL") ENUM_LANGOPT(HLSLVersion, HLSLLangStd, 16, HLSL_Unset, "HLSL Version") diff --git a/clang/include/clang/Basic/LangStandard.h b/clang/include/clang/Basic/LangStandard.h index 56a0d2c95e2b191..49412232c9c5edd 100644 --- a/clang/include/clang/Basic/LangStandard.h +++ b/clang/include/clang/Basic/LangStandard.h @@ -39,7 +39,6 @@ enum class Language : uint8_t { OpenCL, OpenCLCXX, CUDA, - RenderScript, HIP, HLSL, ///@} diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 17262d5968b12d4..25eda907d20a7bf 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -262,9 +262,6 @@ class TargetInfo : public TransferrableTargetInfo, LLVM_PREFERRED_TYPE(bool) unsigned HasBuiltinMSVaList : 1; - LLVM_PREFERRED_TYPE(bool) - unsigned IsRenderScriptTarget : 1; - LLVM_PREFERRED_TYPE(bool) unsigned HasAArch64SVETypes : 1; @@ -1031,9 +1028,6 @@ class TargetInfo : public TransferrableTargetInfo, /// available on this target. bool hasBuiltinMSVaList() const { return HasBuiltinMSVaList; } - /// Returns true for RenderScript. - bool isRenderScriptTarget() const { return IsRenderScriptTarget; } - /// Returns whether or not the AArch64 SVE built-in types are /// available on this target. bool hasAArch64SVETypes() const { return HasAArch64SVETypes; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f86e90e14c477ba..69ae0553507bd3a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -627,7 +627,6 @@ defvar c23 = LangOpts<"C23">; defvar lang_std = LangOpts<"LangStd">; defvar open_cl = LangOpts<"OpenCL">; defvar cuda = LangOpts<"CUDA">; -defvar render_script = LangOpts<"RenderScript">; defvar hip = LangOpts<"HIP">; defvar gnu_mode = LangOpts<"GNUMode">; defvar asm_preprocessor = LangOpts<"AsmPreprocessor">; @@ -8118,11 +8117,11 @@ def vtordisp_mode_EQ : Joined<["-"], "vtordisp-mode=">, def fnative_half_type: Flag<["-"], "fnative-half-type">, HelpText<"Use the native half type for __fp16 instead of promoting to float">, MarshallingInfoFlag>, - ImpliedByAnyOf<[open_cl.KeyPath, render_script.KeyPath]>; + ImpliedByAnyOf<[open_cl.KeyPath]>; def fnative_half_arguments_and_returns : Flag<["-"], "fnative-half-arguments-and-returns">, HelpText<"Use the native __fp16 type for arguments and returns (and skip ABI-specific lowering)">, MarshallingInfoFlag>, - ImpliedByAnyOf<[open_cl.KeyPath, render_script.KeyPath, hlsl.KeyPath, hip.KeyPath]>; + ImpliedByAnyOf<[open_cl.KeyPath, hlsl.KeyPath, hip.KeyPath]>; def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">, HelpText<"Set default calling convention">, Values<"cdecl,fastcall,stdcall,vectorcall,regcall,rtdcall">, diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def index af186c5df69201b..214c5e7a789f97e 100644 --- a/clang/include/clang/Driver/Types.def +++ b/clang/include/clang/Driver/Types.def @@ -55,7 +55,6 @@ TYPE("c++", CXX, PP_CXX, "cpp", phases TYPE("objective-c++-cpp-output", PP_ObjCXX, INVALID, "mii", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("objc++-cpp-output", PP_ObjCXX_Alias, INVALID, "mii", phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("objective-c++", ObjCXX, PP_ObjCXX, "mm", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) -TYPE("renderscript", RenderScript, PP_C, "rs", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) TYPE("hlsl", HLSL, PP_CXX, "hlsl", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble) // C family input files to precompile. diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index da3216ae03af2e6..94caf6a3897bc1c 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -203,8 +203,6 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang, Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); } - Opts.RenderScript = Lang == Language::RenderScript; - // OpenCL, C++ and C23 have bool, true, false keywords. Opts.Bool = Opts.OpenCL || Opts.CPlusPlus || Opts.C23; diff --git a/clang/lib/Basic/LangStandards.cpp b/clang/lib/Basic/LangStandards.cpp index 214567a53efe95a..c49d095018b2002 100644 --- a/clang/lib/Basic/LangStandards.cpp +++ b/clang/lib/Basic/LangStandards.cpp @@ -37,8 +37,6 @@ StringRef clang::languageToString(Language L) { return "OpenCLC++"; case Language::CUDA: return "CUDA"; - case Language::RenderScript: - return "RenderScript"; case Language::HIP: return "HIP"; case Language::HLSL: @@ -114,8 +112,6 @@ LangStandard::Kind clang::getDefaultLanguageStandard(clang::Language Lang, case Language::CUDA: case Language::HIP: return LangStandard::lang_gnucxx17; - case Language::RenderScript: - return LangStandard::lang_c99; case Language::HLSL: return LangStandard::lang_hlsl202x; } diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index 145ca545854da7d..86befb1cbc74fc8 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -154,7 +154,6 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { SSERegParmMax = 0; HasAlignMac68kSupport = false; HasBuiltinMSVaList = false; - IsRenderScriptTarget = false; HasAArch64SVETypes = false; HasRISCVVTypes = false; AllowAMDGPUUnsafeFPAtomics = false; diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 4917ef015941be4..0021d33c45d7c9b 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -710,12 +710,6 @@ std::unique_ptr AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::dxil: return std::make_unique(Triple, Opts); - case llvm::Triple::renderscript32: - return std::make_unique>(Triple, - Opts); - case llvm::Triple::renderscript64: - return std::make_unique>(Triple, - Opts); case llvm::Triple::ve: return std::make_unique>(Triple, Opts); diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 3dbba2b4d25bd6c..a0f94d5d3154807 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1723,19 +1723,3 @@ TargetInfo::BuiltinVaListKind DarwinAArch64TargetInfo::getBuiltinVaListKind() const { return TargetInfo::CharPtrBuiltinVaList; } - -// 64-bit RenderScript is aarch64 -RenderScript64TargetInfo::RenderScript64TargetInfo(const llvm::Triple &Triple, - const TargetOptions &Opts) - : AArch64leTargetInfo(llvm::Triple("aarch64", Triple.getVendorName(), - Triple.getOSName(), - Triple.getEnvironmentName()), - Opts) { - IsRenderScriptTarget = true; -} - -void RenderScript64TargetInfo::getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const { - Builder.defineMacro("__RENDERSCRIPT__"); - AArch64leTargetInfo::getTargetDefines(Opts, Builder); -} diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 16a02e102e045d6..ea3e4015d842653 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -319,17 +319,6 @@ class LLVM_LIBRARY_VISIBILITY DarwinAArch64TargetInfo MacroBuilder &Builder) const override; }; -// 64-bit RenderScript is aarch64 -class LLVM_LIBRARY_VISIBILITY RenderScript64TargetInfo - : public AArch64leTargetInfo { -public: - RenderScript64TargetInfo(const llvm::Triple &Triple, - const TargetOptions &Opts); - - void getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const override; -}; - } // namespace targets } // namespace clang diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index c87300bf2d60e04..370444057b42981 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -1498,19 +1498,3 @@ void DarwinARMTargetInfo::getOSDefines(const LangOptions &Opts, MacroBuilder &Builder) const { getDarwinDefines(Builder, Opts, Triple, PlatformName, PlatformMinVersion); } - -RenderScript32TargetInfo::RenderScript32TargetInfo(const llvm::Triple &Triple, - const TargetOptions &Opts) - : ARMleTargetInfo(llvm::Triple("armv7", Triple.getVendorName(), - Triple.getOSName(), - Triple.getEnvironmentName()), - Opts) { - IsRenderScriptTarget = true; - LongWidth = LongAlign = 64; -} - -void RenderScript32TargetInfo::getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const { - Builder.defineMacro("__RENDERSCRIPT__"); - ARMleTargetInfo::getTargetDefines(Opts, Builder); -} diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index df9855a52e61c0f..55ecb99d82d8fb1 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -310,17 +310,6 @@ class LLVM_LIBRARY_VISIBILITY DarwinARMTargetInfo DarwinARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts); }; -// 32-bit RenderScript is armv7 with width and align of 'long' set to 8-bytes -class LLVM_LIBRARY_VISIBILITY RenderScript32TargetInfo - : public ARMleTargetInfo { -public: - RenderScript32TargetInfo(const llvm::Triple &Triple, - const TargetOptions &Opts); - - void getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const override; -}; - } // namespace targets } // namespace clang diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp index be91b85e3a816f8..79300df15d0e29f 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.cpp +++ b/clang/lib/CodeGen/ABIInfoImpl.cpp @@ -80,16 +80,6 @@ RValue DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Slot); } -ABIArgInfo CodeGen::coerceToIntArray(QualType Ty, ASTContext &Context, - llvm::LLVMContext &LLVMContext) { - // Alignment and Size are measured in bits. - const uint64_t Size = Context.getTypeSize(Ty); - const uint64_t Alignment = Context.getTypeAlign(Ty); - llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment); - const uint64_t NumElements = (Size + Alignment - 1) / Alignment; - return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements)); -} - void CodeGen::AssignToArrayRange(CodeGen::CGBuilderTy &Builder, llvm::Value *Array, llvm::Value *Value, unsigned FirstIndex, unsigned LastIndex) { diff --git a/clang/lib/CodeGen/ABIInfoImpl.h b/clang/lib/CodeGen/ABIInfoImpl.h index 2a3ef6b8a6c9610..d9d79c6a55ddb16 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.h +++ b/clang/lib/CodeGen/ABIInfoImpl.h @@ -33,23 +33,6 @@ class DefaultABIInfo : public ABIInfo { AggValueSlot Slot) const override; }; -// Helper for coercing an aggregate argument or return value into an integer -// array of the same size (including padding) and alignment. This alternate -// coercion happens only for the RenderScript ABI and can be removed after -// runtimes that rely on it are no longer supported. -// -// RenderScript assumes that the size of the argument / return value in the IR -// is the same as the size of the corresponding qualified type. This helper -// coerces the aggregate type into an array of the same size (including -// padding). This coercion is used in lieu of expansion of struct members or -// other canonical coercions that return a coerced-type of larger size. -// -// Ty - The argument / return value type -// Context - The associated ASTContext -// LLVMContext - The associated LLVMContext -ABIArgInfo coerceToIntArray(QualType Ty, ASTContext &Context, - llvm::LLVMContext &LLVMContext); - void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, llvm::Value *Array, llvm::Value *Value, unsigned FirstIndex, unsigned LastIndex); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 5fd6cfa63e6efab..ad64abe7cd40a39 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -624,8 +624,6 @@ void CGDebugInfo::CreateCompileUnit() { } else if (LO.OpenCL && (!CGM.getCodeGenOpts().DebugStrictDwarf || CGM.getCodeGenOpts().DwarfVersion >= 5)) { LangTag = llvm::dwarf::DW_LANG_OpenCL; - } else if (LO.RenderScript) { - LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript; } else if (LO.C11 && !(CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)) { LangTag = llvm::dwarf::DW_LANG_C11; } else if (LO.C99) { diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index a80411971b60c39..9320c6ef06efab0 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -462,11 +462,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn, // Aggregates <= 16 bytes are passed directly in registers or on the stack. if (Size <= 128) { - // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of - // same size and alignment. - if (getTarget().isRenderScriptTarget()) { - return coerceToIntArray(Ty, getContext(), getVMContext()); - } unsigned Alignment; if (Kind == AArch64ABIKind::AAPCS) { Alignment = getContext().getTypeUnadjustedAlign(Ty); @@ -548,12 +543,6 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy, // Aggregates <= 16 bytes are returned directly in registers or on the stack. if (Size <= 128) { - // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of - // same size and alignment. - if (getTarget().isRenderScriptTarget()) { - return coerceToIntArray(RetTy, getContext(), getVMContext()); - } - if (Size <= 64 && getDataLayout().isLittleEndian()) { // Composite types are returned in lower bits of a 64-bit register for LE, // and in higher bits for BE. However, integer types are always returned diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp index 49ac1a76e767aa0..2d858fa2f3c3a35 100644 --- a/clang/lib/CodeGen/Targets/ARM.cpp +++ b/clang/lib/CodeGen/Targets/ARM.cpp @@ -420,12 +420,6 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, /*Realign=*/TyAlign > ABIAlign); } - // On RenderScript, coerce Aggregates <= 64 bytes to an integer array of - // same size and alignment. - if (getTarget().isRenderScriptTarget()) { - return coerceToIntArray(Ty, getContext(), getVMContext()); - } - // Otherwise, pass by coercing to a structure of the appropriate size. llvm::Type* ElemTy; unsigned SizeRegs; @@ -609,11 +603,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, // are returned indirectly. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 32) { - // On RenderScript, coerce Aggregates <= 4 bytes to an integer array of - // same size and alignment. - if (getTarget().isRenderScriptTarget()) { - return coerceToIntArray(RetTy, getContext(), getVMContext()); - } if (getDataLayout().isBigEndian()) // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp index 3de45b00b4d00fd..b14aac0f0ce015e 100644 --- a/clang/lib/Driver/Types.cpp +++ b/clang/lib/Driver/Types.cpp @@ -201,7 +201,6 @@ bool types::isDerivedFromC(ID Id) { case TY_PP_ObjCXX: case TY_PP_ObjCXX_Alias: case TY_ObjCXX: - case TY_RenderScript: case TY_PP_CHeader: case TY_CHeader: case TY_CLHeader: @@ -328,7 +327,6 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { .Case("ll", TY_LLVM_IR) .Case("mi", TY_PP_ObjC) .Case("mm", TY_ObjCXX) - .Case("rs", TY_RenderScript) .Case("adb", TY_Ada) .Case("ads", TY_Ada) .Case("asm", TY_PP_Asm) diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 030509d37875950..c730c062b6a1d56 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -213,7 +213,6 @@ StringRef getLanguageName(Language Lang) { case Language::OpenCL: case Language::OpenCLCXX: case Language::CUDA: - case Language::RenderScript: case Language::HIP: case Language::HLSL: diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp index bffff0d27af3ab6..4aec928f9eb0a5b 100644 --- a/clang/lib/Frontend/ASTUnit.cpp +++ b/clang/lib/Frontend/ASTUnit.cpp @@ -2699,8 +2699,6 @@ InputKind ASTUnit::getInputKind() const { Lang = Language::OpenCL; else if (LangOpts.CUDA) Lang = Language::CUDA; - else if (LangOpts.RenderScript) - Lang = Language::RenderScript; else if (LangOpts.CPlusPlus) Lang = LangOpts.ObjC ? Language::ObjCXX : Language::CXX; else diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index db7c791059a32ea..d8261e12b08b5c8 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2846,9 +2846,6 @@ static void GenerateFrontendArgs(const FrontendOptions &Opts, case Language::ObjCXX: Lang = "objective-c++"; break; - case Language::RenderScript: - Lang = "renderscript"; - break; case Language::Asm: Lang = "assembler-with-cpp"; break; @@ -3071,7 +3068,6 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, .Case("c++", Language::CXX) .Case("objective-c", Language::ObjC) .Case("objective-c++", Language::ObjCXX) - .Case("renderscript", Language::RenderScript) .Case("hlsl", Language::HLSL) .Default(Language::Unknown); @@ -3499,7 +3495,6 @@ static bool IsInputCompatibleWithStandard(InputKind IK, case Language::C: case Language::ObjC: - case Language::RenderScript: return S.getLanguage() == Language::C; case Language::OpenCL: @@ -3551,8 +3546,6 @@ static StringRef GetInputKindName(InputKind IK) { return "C++ for OpenCL"; case Language::CUDA: return "CUDA"; - case Language::RenderScript: - return "RenderScript"; case Language::HIP: return "HIP"; diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 64f90c493c1055d..e943f143d4c1588 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -1108,7 +1108,6 @@ void PrintPreambleAction::ExecuteAction() { case Language::Unknown: case Language::Asm: case Language::LLVM_IR: - case Language::RenderScript: // We can't do anything with these. return; } diff --git a/clang/test/CodeGen/debug-info-renderscript-tag.rs b/clang/test/CodeGen/debug-info-renderscript-tag.rs deleted file mode 100644 index ded650d9660b8b4..000000000000000 --- a/clang/test/CodeGen/debug-info-renderscript-tag.rs +++ /dev/null @@ -1,3 +0,0 @@ -// RUN: %clang -emit-llvm -S -g %s -o - | FileCheck %s - -// CHECK: !DICompileUnit(language: DW_LANG_GOOGLE_RenderScript{{.*}}) diff --git a/clang/test/CodeGen/fp16-ops.c b/clang/test/CodeGen/fp16-ops.c index bfa2a2f7f6c8267..4c206690a7518e8 100644 --- a/clang/test/CodeGen/fp16-ops.c +++ b/clang/test/CodeGen/fp16-ops.c @@ -6,8 +6,6 @@ // RUN: | FileCheck %s --check-prefix=NATIVE-HALF // RUN: %clang_cc1 -emit-llvm -o - -triple aarch64 -fnative-half-type %s \ // RUN: | FileCheck %s --check-prefix=NATIVE-HALF -// RUN: %clang_cc1 -emit-llvm -o - -x renderscript %s \ -// RUN: | FileCheck %s --check-prefix=NATIVE-HALF typedef unsigned cond_t; typedef __fp16 float16_t; diff --git a/clang/test/CodeGen/renderscript.c b/clang/test/CodeGen/renderscript.c deleted file mode 100644 index 1629665c1ffb874..000000000000000 --- a/clang/test/CodeGen/renderscript.c +++ /dev/null @@ -1,140 +0,0 @@ -// RUN: %clang_cc1 %s -triple=renderscript32-none-linux-gnueabi -emit-llvm -o - -Werror | FileCheck %s -check-prefix=CHECK-RS32 -// RUN: %clang_cc1 %s -triple=renderscript64-none-linux-android -emit-llvm -o - -Werror | FileCheck %s -check-prefix=CHECK-RS64 -// RUN: %clang_cc1 %s -triple=armv7-none-linux-gnueabi -emit-llvm -o - -Werror | FileCheck %s -check-prefix=CHECK-ARM - -// Ensure that the bitcode has the correct triple -// CHECK-RS32: target triple = "armv7-none-linux-gnueabi" -// CHECK-RS64: target triple = "aarch64-none-linux-android" -// CHECK-ARM: target triple = "armv7-none-linux-gnueabi" - -// Ensure that long data type has 8-byte size and alignment in RenderScript -#ifdef __RENDERSCRIPT__ -#define LONG_WIDTH_AND_ALIGN 8 -#else -#define LONG_WIDTH_AND_ALIGN 4 -#endif - -_Static_assert(sizeof(long) == LONG_WIDTH_AND_ALIGN, "sizeof long is wrong"); -_Static_assert(_Alignof(long) == LONG_WIDTH_AND_ALIGN, "sizeof long is wrong"); - -// CHECK-RS32: i64 @test_long(i64 noundef %v) -// CHECK-RS64: i64 @test_long(i64 noundef %v) -// CHECK-ARM: i32 @test_long(i32 noundef %v) -long test_long(long v) { - return v + 1; -} - -// ============================================================================= -// Test coercion of aggregate argument or return value into integer arrays -// ============================================================================= - -// ============================================================================= -// aggregate parameter <= 4 bytes: coerced to [a x iNN] for both 32-bit and -// 64-bit RenderScript -// ============================================================================== - -typedef struct {char c1, c2, c3; } sChar3; -typedef struct {short s; char c;} sShortChar; - -// CHECK-RS32: void @argChar3([3 x i8] %s.coerce) -// CHECK-RS64: void @argChar3([3 x i8] %s.coerce) -void argChar3(sChar3 s) {} - -// CHECK-RS32: void @argShortChar([2 x i16] %s.coerce) -// CHECK-RS64: void @argShortChar([2 x i16] %s.coerce) -void argShortChar(sShortChar s) {} - -// ============================================================================= -// aggregate return value <= 4 bytes: coerced to [a x iNN] for both 32-bit and -// 64-bit RenderScript -// ============================================================================= - -// CHECK-RS32: [3 x i8] @retChar3() -// CHECK-RS64: [3 x i8] @retChar3() -sChar3 retChar3(void) { sChar3 r; return r; } - -// CHECK-RS32: [2 x i16] @retShortChar() -// CHECK-RS64: [2 x i16] @retShortChar() -sShortChar retShortChar(void) { sShortChar r; return r; } - -// ============================================================================= -// aggregate parameter <= 16 bytes: coerced to [a x iNN] for both 32-bit and -// 64-bit RenderScript -// ============================================================================= - -typedef struct {short s1; char c; short s2; } sShortCharShort; -typedef struct {int i; short s; char c; } sIntShortChar; -typedef struct {long l; int i; } sLongInt; - -// CHECK-RS32: void @argShortCharShort([3 x i16] %s.coerce) -// CHECK-RS64: void @argShortCharShort([3 x i16] %s.coerce) -void argShortCharShort(sShortCharShort s) {} - -// CHECK-RS32: void @argIntShortChar([2 x i32] %s.coerce) -// CHECK-RS64: void @argIntShortChar([2 x i32] %s.coerce) -void argIntShortChar(sIntShortChar s) {} - -// CHECK-RS32: void @argLongInt([2 x i64] %s.coerce) -// CHECK-RS64: void @argLongInt([2 x i64] %s.coerce) -void argLongInt(sLongInt s) {} - -// ============================================================================= -// aggregate return value <= 16 bytes: returned on stack for 32-bit RenderScript -// and coerced to [a x iNN] for 64-bit RenderScript -// ============================================================================= - -// CHECK-RS32: void @retShortCharShort(ptr dead_on_unwind noalias writable sret(%struct.sShortCharShort) align 2 %agg.result) -// CHECK-RS64: [3 x i16] @retShortCharShort() -sShortCharShort retShortCharShort(void) { sShortCharShort r; return r; } - -// CHECK-RS32: void @retIntShortChar(ptr dead_on_unwind noalias writable sret(%struct.sIntShortChar) align 4 %agg.result) -// CHECK-RS64: [2 x i32] @retIntShortChar() -sIntShortChar retIntShortChar(void) { sIntShortChar r; return r; } - -// CHECK-RS32: void @retLongInt(ptr dead_on_unwind noalias writable sret(%struct.sLongInt) align 8 %agg.result) -// CHECK-RS64: [2 x i64] @retLongInt() -sLongInt retLongInt(void) { sLongInt r; return r; } - -// ============================================================================= -// aggregate parameter <= 64 bytes: coerced to [a x iNN] for 32-bit RenderScript -// and passed on the stack for 64-bit RenderScript -// ============================================================================= - -typedef struct {int i1, i2, i3, i4, i5; } sInt5; -typedef struct {long l1, l2; char c; } sLong2Char; - -// CHECK-RS32: void @argInt5([5 x i32] %s.coerce) -// CHECK-RS64: void @argInt5(ptr noundef %s) -void argInt5(sInt5 s) {} - -// CHECK-RS32: void @argLong2Char([3 x i64] %s.coerce) -// CHECK-RS64: void @argLong2Char(ptr noundef %s) -void argLong2Char(sLong2Char s) {} - -// ============================================================================= -// aggregate return value <= 64 bytes: returned on stack for both 32-bit and -// 64-bit RenderScript -// ============================================================================= - -// CHECK-RS32: void @retInt5(ptr dead_on_unwind noalias writable sret(%struct.sInt5) align 4 %agg.result) -// CHECK-RS64: void @retInt5(ptr dead_on_unwind noalias writable sret(%struct.sInt5) align 4 %agg.result) -sInt5 retInt5(void) { sInt5 r; return r;} - -// CHECK-RS32: void @retLong2Char(ptr dead_on_unwind noalias writable sret(%struct.sLong2Char) align 8 %agg.result) -// CHECK-RS64: void @retLong2Char(ptr dead_on_unwind noalias writable sret(%struct.sLong2Char) align 8 %agg.result) -sLong2Char retLong2Char(void) { sLong2Char r; return r;} - -// ============================================================================= -// aggregate parameters and return values > 64 bytes: passed and returned on the -// stack for both 32-bit and 64-bit RenderScript -// ============================================================================= - -typedef struct {long l1, l2, l3, l4, l5, l6, l7, l8, l9; } sLong9; - -// CHECK-RS32: void @argLong9(ptr noundef byval(%struct.sLong9) align 8 %s) -// CHECK-RS64: void @argLong9(ptr noundef %s) -void argLong9(sLong9 s) {} - -// CHECK-RS32: void @retLong9(ptr dead_on_unwind noalias writable sret(%struct.sLong9) align 8 %agg.result) -// CHECK-RS64: void @retLong9(ptr dead_on_unwind noalias writable sret(%struct.sLong9) align 8 %agg.result) -sLong9 retLong9(void) { sLong9 r; return r; } diff --git a/clang/test/Driver/renderscript.rs b/clang/test/Driver/renderscript.rs deleted file mode 100644 index 84f5dc4de777a2e..000000000000000 --- a/clang/test/Driver/renderscript.rs +++ /dev/null @@ -1,3 +0,0 @@ -// RUN: %clang -### 2>&1 %s | FileCheck %s - -// CHECK: "-x" "renderscript" diff --git a/clang/test/Driver/unknown-std.c b/clang/test/Driver/unknown-std.c index 8f9047b2230adb5..332d587ddd4a17a 100644 --- a/clang/test/Driver/unknown-std.c +++ b/clang/test/Driver/unknown-std.c @@ -4,7 +4,6 @@ // RUN: not %clang %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s // RUN: not %clang -x objective-c %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s -// RUN: not %clang -x renderscript %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s // CHECK: error: invalid value 'foobar' in '-std=foobar' // CHECK-NEXT: note: use 'c89', 'c90', or 'iso9899:1990' for 'ISO C 1990' standard diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 914f94c08a9fd98..e28b0775410c0a5 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -173,7 +173,6 @@ // CHECK-NEXT: RandomizeLayout (SubjectMatchRule_record) // CHECK-NEXT: ReadOnlyPlacement (SubjectMatchRule_record) // CHECK-NEXT: ReleaseHandle (SubjectMatchRule_variable_is_parameter) -// CHECK-NEXT: RenderScriptKernel (SubjectMatchRule_function) // CHECK-NEXT: ReqdWorkGroupSize (SubjectMatchRule_function) // CHECK-NEXT: Restrict (SubjectMatchRule_function) // CHECK-NEXT: ReturnTypestate (SubjectMatchRule_function, SubjectMatchRule_variable_is_parameter) diff --git a/clang/test/Preprocessor/predefined-macros-no-warnings.c b/clang/test/Preprocessor/predefined-macros-no-warnings.c index d44b99a2b192a16..4e3e29ccfa8a83f 100644 --- a/clang/test/Preprocessor/predefined-macros-no-warnings.c +++ b/clang/test/Preprocessor/predefined-macros-no-warnings.c @@ -179,8 +179,6 @@ // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple wasm64-wasi // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple wasm64-emscripten // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple dxil -// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple renderscript32 -// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple renderscript64 // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple ve // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple csky // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple csky-linux diff --git a/clang/test/Sema/renderscript.rs b/clang/test/Sema/renderscript.rs deleted file mode 100644 index 8fa0d4389e7f1f8..000000000000000 --- a/clang/test/Sema/renderscript.rs +++ /dev/null @@ -1,25 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -x renderscript -D__RENDERSCRIPT__ %s -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -x c %s -// REQUIRES: x86-registered-target - -#ifndef __RENDERSCRIPT__ -// expected-warning@+2 {{'kernel' attribute ignored}} -#endif -void __attribute__((kernel)) kernel(void) {} - -#ifndef __RENDERSCRIPT__ -// expected-warning@+4 {{'kernel' attribute ignored}} -#else -// expected-warning@+2 {{'kernel' attribute only applies to functions}} -#endif -int __attribute__((kernel)) global; - -#ifndef __RENDERSCRIPT__ -// expected-error@+2 {{function return value cannot have __fp16 type; did you forget * ?}} -#endif -__fp16 fp16_return(void); - -#ifndef __RENDERSCRIPT__ -// expected-error@+2 {{parameters cannot have __fp16 type; did you forget * ?}} -#endif -void fp16_arg(__fp16 p); diff --git a/clang/www/index.html b/clang/www/index.html index 95bbfa86172ba40..465a6a6d29dc765 100755 --- a/clang/www/index.html +++ b/clang/www/index.html @@ -16,8 +16,8 @@

Clang: a C language family frontend for LLVM

The Clang project provides a language front-end and tooling infrastructure - for languages in the C language family (C, C++, Objective C/C++, OpenCL, - CUDA, and RenderScript) for the LLVM + for languages in the C language family (C, C++, Objective C/C++, OpenCL, and + CUDA) for the LLVM project. Both a GCC-compatible compiler driver (clang) and an MSVC-compatible compiler driver (clang-cl.exe) are provided. You can get and build the source today.

From f14743794587db102c6d1b20f9c87a1ac20decfd Mon Sep 17 00:00:00 2001 From: jimingham Date: Mon, 28 Oct 2024 10:01:57 -0700 Subject: [PATCH 194/425] Add the ability to break on call-site locations, improve inline stepping (#112939) Previously lldb didn't support setting breakpoints on call site locations. This patch adds that ability. It would be very slow if we did this by searching all the debug information for every inlined subroutine record looking for a call-site match, so I added one restriction to the call-site support. This change will find all call sites for functions that also supply at least one line to the regular line table. That way we can use the fact that the line table search will move the location to that subsequent line (but only within the same function). When we find an actually moved source line match, we can search in the function that contained that line table entry for the call-site, and set the breakpoint location back to that. When I started writing tests for this new ability, it quickly became obvious that our support for virtual inline stepping was pretty buggy. We didn't print the right file & line number for the breakpoint, and we didn't set the position in the "virtual inlined stack" correctly when we hit the breakpoint. We also didn't step through the inlined frames correctly. There was code to try to detect the right inlined stack position, but it had been refactored a while back with the comment that it was super confusing and the refactor was supposed to make it clearer, but the refactor didn't work either. That code was made much clearer by abstracting the job of "handling the stack readjustment" to the various StopInfo's. Previously, there was a big (and buggy) switch over stop info's. Moving the responsibility to the stop info made this code much easier to reason about. We also had no tests for virtual inlined stepping (our inlined stepping test was actually written specifically to avoid the formation of a virtual inlined stack... So I also added tests for that along with the tests for setting the call-site breakpoints. --- .../lldb/Breakpoint/BreakpointLocation.h | 36 ++++ lldb/include/lldb/Breakpoint/BreakpointSite.h | 5 + lldb/include/lldb/Core/Declaration.h | 6 +- lldb/include/lldb/Target/StopInfo.h | 12 ++ .../lldb/Target/ThreadPlanStepInRange.h | 4 +- lldb/source/Breakpoint/BreakpointLocation.cpp | 63 ++++++- lldb/source/Breakpoint/BreakpointResolver.cpp | 15 ++ lldb/source/Breakpoint/BreakpointSite.cpp | 17 ++ lldb/source/Core/Declaration.cpp | 5 +- lldb/source/Symbol/Block.cpp | 2 +- lldb/source/Symbol/CompileUnit.cpp | 111 +++++++++++- lldb/source/Target/StackFrameList.cpp | 171 ++++++------------ lldb/source/Target/StopInfo.cpp | 55 ++++++ lldb/source/Target/Thread.cpp | 8 + lldb/source/Target/ThreadPlanStepInRange.cpp | 24 ++- .../source/Target/ThreadPlanStepOverRange.cpp | 2 +- .../inline-stepping/TestInlineStepping.py | 63 +++++++ .../inline-stepping/calling.cpp | 25 +++ 18 files changed, 493 insertions(+), 131 deletions(-) diff --git a/lldb/include/lldb/Breakpoint/BreakpointLocation.h b/lldb/include/lldb/Breakpoint/BreakpointLocation.h index cca00335bc3c67d..3592291bb2d06e9 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointLocation.h +++ b/lldb/include/lldb/Breakpoint/BreakpointLocation.h @@ -11,10 +11,12 @@ #include #include +#include #include "lldb/Breakpoint/BreakpointOptions.h" #include "lldb/Breakpoint/StoppointHitCounter.h" #include "lldb/Core/Address.h" +#include "lldb/Symbol/LineEntry.h" #include "lldb/Utility/UserID.h" #include "lldb/lldb-private.h" @@ -282,6 +284,25 @@ class BreakpointLocation /// Returns the breakpoint location ID. lldb::break_id_t GetID() const { return m_loc_id; } + /// Set the line entry that should be shown to users for this location. + /// It is up to the caller to verify that this is a valid entry to show. + /// The current use of this is to distinguish among line entries from a + /// virtual inlined call stack that all share the same address. + /// The line entry must have the same start address as the address for this + /// location. + bool SetPreferredLineEntry(const LineEntry &line_entry) { + if (m_address == line_entry.range.GetBaseAddress()) { + m_preferred_line_entry = line_entry; + return true; + } + assert(0 && "Tried to set a preferred line entry with a different address"); + return false; + } + + const std::optional GetPreferredLineEntry() { + return m_preferred_line_entry; + } + protected: friend class BreakpointSite; friend class BreakpointLocationList; @@ -306,6 +327,16 @@ class BreakpointLocation /// If it returns false we should continue, otherwise stop. bool IgnoreCountShouldStop(); + /// If this location knows that the virtual stack frame it represents is + /// not frame 0, return the suggested stack frame instead. This will happen + /// when the location's address contains a "virtual inlined call stack" and + /// the breakpoint was set on a file & line that are not at the bottom of that + /// stack. For now we key off the "preferred line entry" - looking for that + /// in the blocks that start with the stop PC. + /// This version of the API doesn't take an "inlined" parameter because it + /// only changes frames in the inline stack. + std::optional GetSuggestedStackFrameIndex(); + private: void SwapLocation(lldb::BreakpointLocationSP swap_from); @@ -369,6 +400,11 @@ class BreakpointLocation lldb::break_id_t m_loc_id; ///< Breakpoint location ID. StoppointHitCounter m_hit_counter; ///< Number of times this breakpoint /// location has been hit. + /// If this exists, use it to print the stop description rather than the + /// LineEntry m_address resolves to directly. Use this for instance when the + /// location was given somewhere in the virtual inlined call stack since the + /// Address always resolves to the lowest entry in the stack. + std::optional m_preferred_line_entry; void SetShouldResolveIndirectFunctions(bool do_resolve) { m_should_resolve_indirect_functions = do_resolve; diff --git a/lldb/include/lldb/Breakpoint/BreakpointSite.h b/lldb/include/lldb/Breakpoint/BreakpointSite.h index 17b76d51c1ae53a..7b3f7be23639f27 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointSite.h +++ b/lldb/include/lldb/Breakpoint/BreakpointSite.h @@ -170,6 +170,11 @@ class BreakpointSite : public std::enable_shared_from_this, /// \see lldb::DescriptionLevel void GetDescription(Stream *s, lldb::DescriptionLevel level); + // This runs through all the breakpoint locations owning this site and returns + // the greatest of their suggested stack frame indexes. This only handles + // inlined stack changes. + std::optional GetSuggestedStackFrameIndex(); + /// Tell whether a breakpoint has a location at this site. /// /// \param[in] bp_id diff --git a/lldb/include/lldb/Core/Declaration.h b/lldb/include/lldb/Core/Declaration.h index 4a0e9047b54695e..c864b88c6b32a37 100644 --- a/lldb/include/lldb/Core/Declaration.h +++ b/lldb/include/lldb/Core/Declaration.h @@ -84,10 +84,14 @@ class Declaration { /// \param[in] declaration /// The const Declaration object to compare with. /// + /// \param[in] full + /// Same meaning as Full in FileSpec::Equal. True means an empty + /// directory is not equal to a specified one, false means it is equal. + /// /// \return /// Returns \b true if \b declaration is at the same file and /// line, \b false otherwise. - bool FileAndLineEqual(const Declaration &declaration) const; + bool FileAndLineEqual(const Declaration &declaration, bool full) const; /// Dump a description of this object to a Stream. /// diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h index fae90364deaf0a3..45beac129e86f7a 100644 --- a/lldb/include/lldb/Target/StopInfo.h +++ b/lldb/include/lldb/Target/StopInfo.h @@ -77,6 +77,18 @@ class StopInfo : public std::enable_shared_from_this { m_description.clear(); } + /// This gives the StopInfo a chance to suggest a stack frame to select. + /// Passing true for inlined_stack will request changes to the inlined + /// call stack. Passing false will request changes to the real stack + /// frame. The inlined stack gets adjusted before we call into the thread + /// plans so they can reason based on the correct values. The real stack + /// adjustment is handled after the frame recognizers get a chance to adjust + /// the frame. + virtual std::optional + GetSuggestedStackFrameIndex(bool inlined_stack) { + return {}; + } + virtual bool IsValidForOperatingSystemThread(Thread &thread) { return true; } /// A Continue operation can result in a false stop event diff --git a/lldb/include/lldb/Target/ThreadPlanStepInRange.h b/lldb/include/lldb/Target/ThreadPlanStepInRange.h index f9ef87942a7c03d..9da8370ef1c9250 100644 --- a/lldb/include/lldb/Target/ThreadPlanStepInRange.h +++ b/lldb/include/lldb/Target/ThreadPlanStepInRange.h @@ -80,8 +80,8 @@ class ThreadPlanStepInRange : public ThreadPlanStepRange, bool m_step_past_prologue; // FIXME: For now hard-coded to true, we could put // a switch in for this if there's // demand for that. - bool m_virtual_step; // true if we've just done a "virtual step", i.e. just - // moved the inline stack depth. + LazyBool m_virtual_step; // true if we've just done a "virtual step", i.e. + // just moved the inline stack depth. ConstString m_step_into_target; ThreadPlanStepInRange(const ThreadPlanStepInRange &) = delete; const ThreadPlanStepInRange & diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index ad9057c8141e99b..c7ea50407ae1c77 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -508,8 +508,20 @@ void BreakpointLocation::GetDescription(Stream *s, s->PutCString("re-exported target = "); else s->PutCString("where = "); + + // If there's a preferred line entry for printing, use that. + bool show_function_info = true; + if (auto preferred = GetPreferredLineEntry()) { + sc.line_entry = *preferred; + // FIXME: We're going to get the function name wrong when the preferred + // line entry is not the lowest one. For now, just leave the function + // out in this case, but we really should also figure out how to easily + // fake the function name here. + show_function_info = false; + } sc.DumpStopContext(s, m_owner.GetTarget().GetProcessSP().get(), m_address, - false, true, false, true, true, true); + false, true, false, show_function_info, + show_function_info, show_function_info); } else { if (sc.module_sp) { s->EOL(); @@ -537,7 +549,10 @@ void BreakpointLocation::GetDescription(Stream *s, if (sc.line_entry.line > 0) { s->EOL(); s->Indent("location = "); - sc.line_entry.DumpStopContext(s, true); + if (auto preferred = GetPreferredLineEntry()) + preferred->DumpStopContext(s, true); + else + sc.line_entry.DumpStopContext(s, true); } } else { @@ -656,6 +671,50 @@ void BreakpointLocation::SendBreakpointLocationChangedEvent( } } +std::optional BreakpointLocation::GetSuggestedStackFrameIndex() { + auto preferred_opt = GetPreferredLineEntry(); + if (!preferred_opt) + return {}; + LineEntry preferred = *preferred_opt; + SymbolContext sc; + if (!m_address.CalculateSymbolContext(&sc)) + return {}; + // Don't return anything special if frame 0 is the preferred line entry. + // We not really telling the stack frame list to do anything special in that + // case. + if (!LineEntry::Compare(sc.line_entry, preferred)) + return {}; + + if (!sc.block) + return {}; + + // Blocks have their line info in Declaration form, so make one here: + Declaration preferred_decl(preferred.GetFile(), preferred.line, + preferred.column); + + uint32_t depth = 0; + Block *inlined_block = sc.block->GetContainingInlinedBlock(); + while (inlined_block) { + // If we've moved to a block that this isn't the start of, that's not + // our inlining info or call site, so we can stop here. + Address start_address; + if (!inlined_block->GetStartAddress(start_address) || + start_address != m_address) + return {}; + + const InlineFunctionInfo *info = inlined_block->GetInlinedFunctionInfo(); + if (info) { + if (preferred_decl == info->GetDeclaration()) + return depth; + if (preferred_decl == info->GetCallSite()) + return depth + 1; + } + inlined_block = inlined_block->GetInlinedParent(); + depth++; + } + return {}; +} + void BreakpointLocation::SwapLocation(BreakpointLocationSP swap_from) { m_address = swap_from->m_address; m_should_resolve_indirect_functions = diff --git a/lldb/source/Breakpoint/BreakpointResolver.cpp b/lldb/source/Breakpoint/BreakpointResolver.cpp index 8307689c7640cfe..9643602d78c751d 100644 --- a/lldb/source/Breakpoint/BreakpointResolver.cpp +++ b/lldb/source/Breakpoint/BreakpointResolver.cpp @@ -340,6 +340,21 @@ void BreakpointResolver::AddLocation(SearchFilter &filter, } BreakpointLocationSP bp_loc_sp(AddLocation(line_start)); + // If the address that we resolved the location to returns a different + // LineEntry from the one in the incoming SC, we're probably dealing with an + // inlined call site, so set that as the preferred LineEntry: + LineEntry resolved_entry; + if (!skipped_prologue && bp_loc_sp && + line_start.CalculateSymbolContextLineEntry(resolved_entry) && + LineEntry::Compare(resolved_entry, sc.line_entry)) { + // FIXME: The function name will also be wrong here. Do we need to record + // that as well, or can we figure that out again when we report this + // breakpoint location. + if (!bp_loc_sp->SetPreferredLineEntry(sc.line_entry)) { + LLDB_LOG(log, "Tried to add a preferred line entry that didn't have the " + "same address as this location's address."); + } + } if (log && bp_loc_sp && !GetBreakpoint()->IsInternal()) { StreamString s; bp_loc_sp->GetDescription(&s, lldb::eDescriptionLevelVerbose); diff --git a/lldb/source/Breakpoint/BreakpointSite.cpp b/lldb/source/Breakpoint/BreakpointSite.cpp index 3ca93f908e30b8b..9700a57d3346e0b 100644 --- a/lldb/source/Breakpoint/BreakpointSite.cpp +++ b/lldb/source/Breakpoint/BreakpointSite.cpp @@ -87,6 +87,23 @@ void BreakpointSite::GetDescription(Stream *s, lldb::DescriptionLevel level) { m_constituents.GetDescription(s, level); } +std::optional BreakpointSite::GetSuggestedStackFrameIndex() { + + std::optional result; + std::lock_guard guard(m_constituents_mutex); + for (BreakpointLocationSP loc_sp : m_constituents.BreakpointLocations()) { + std::optional loc_frame_index = + loc_sp->GetSuggestedStackFrameIndex(); + if (loc_frame_index) { + if (result) + result = std::max(*loc_frame_index, *result); + else + result = loc_frame_index; + } + } + return result; +} + bool BreakpointSite::IsInternal() const { return m_constituents.IsInternal(); } uint8_t *BreakpointSite::GetTrapOpcodeBytes() { return &m_trap_opcode[0]; } diff --git a/lldb/source/Core/Declaration.cpp b/lldb/source/Core/Declaration.cpp index 579a3999d14ea09..a485c4b9ba48a7d 100644 --- a/lldb/source/Core/Declaration.cpp +++ b/lldb/source/Core/Declaration.cpp @@ -70,8 +70,9 @@ int Declaration::Compare(const Declaration &a, const Declaration &b) { return 0; } -bool Declaration::FileAndLineEqual(const Declaration &declaration) const { - int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, true); +bool Declaration::FileAndLineEqual(const Declaration &declaration, + bool full) const { + int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, full); return file_compare == 0 && this->m_line == declaration.m_line; } diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp index f7d9c0d2d330656..5c7772a6db780d3 100644 --- a/lldb/source/Symbol/Block.cpp +++ b/lldb/source/Symbol/Block.cpp @@ -230,7 +230,7 @@ Block *Block::GetContainingInlinedBlockWithCallSite( const auto *function_info = inlined_block->GetInlinedFunctionInfo(); if (function_info && - function_info->GetCallSite().FileAndLineEqual(find_call_site)) + function_info->GetCallSite().FileAndLineEqual(find_call_site, true)) return inlined_block; inlined_block = inlined_block->GetInlinedParent(); } diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index db8f8ce6bcbc923..f0f7e40ae70d832 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -251,7 +251,10 @@ void CompileUnit::ResolveSymbolContext( SymbolContextItem resolve_scope, SymbolContextList &sc_list, RealpathPrefixes *realpath_prefixes) { const FileSpec file_spec = src_location_spec.GetFileSpec(); - const uint32_t line = src_location_spec.GetLine().value_or(0); + const uint32_t line = + src_location_spec.GetLine().value_or(LLDB_INVALID_LINE_NUMBER); + const uint32_t column_num = + src_location_spec.GetColumn().value_or(LLDB_INVALID_COLUMN_NUMBER); const bool check_inlines = src_location_spec.GetCheckInlines(); // First find all of the file indexes that match our "file_spec". If @@ -312,6 +315,112 @@ void CompileUnit::ResolveSymbolContext( 0, file_indexes, src_location_spec, &line_entry); } + // If we didn't manage to find a breakpoint that matched the line number + // requested, that might be because it is only an inline call site, and + // doesn't have a line entry in the line table. Scan for that here. + // + // We are making the assumption that if there was an inlined function it will + // contribute at least 1 non-call-site entry to the line table. That's handy + // because we don't move line breakpoints over function boundaries, so if we + // found a hit, and there were also a call site entry, it would have to be in + // the function containing the PC of the line table match. That way we can + // limit the call site search to that function. + // We will miss functions that ONLY exist as a call site entry. + + if (line_entry.IsValid() && + (line_entry.line != line || line_entry.column != column_num) && + resolve_scope & eSymbolContextLineEntry && check_inlines) { + // We don't move lines over function boundaries, so the address in the + // line entry will be the in function that contained the line that might + // be a CallSite, and we can just iterate over that function to find any + // inline records, and dig up their call sites. + Address start_addr = line_entry.range.GetBaseAddress(); + Function *function = start_addr.CalculateSymbolContextFunction(); + + Declaration sought_decl(file_spec, line, column_num); + // We use this recursive function to descend the block structure looking + // for a block that has this Declaration as in it's CallSite info. + // This function recursively scans the sibling blocks of the incoming + // block parameter. + std::function examine_block = + [&sought_decl, &sc_list, &src_location_spec, resolve_scope, + &examine_block](Block &block) -> void { + // Iterate over the sibling child blocks of the incoming block. + Block *sibling_block = block.GetFirstChild(); + while (sibling_block) { + // We only have to descend through the regular blocks, looking for + // immediate inlines, since those are the only ones that will have this + // callsite. + const InlineFunctionInfo *inline_info = + sibling_block->GetInlinedFunctionInfo(); + if (inline_info) { + // If this is the call-site we are looking for, record that: + // We need to be careful because the call site from the debug info + // will generally have a column, but the user might not have specified + // it. + Declaration found_decl = inline_info->GetCallSite(); + uint32_t sought_column = sought_decl.GetColumn(); + if (found_decl.FileAndLineEqual(sought_decl, false) && + (sought_column == LLDB_INVALID_COLUMN_NUMBER || + sought_column == found_decl.GetColumn())) { + // If we found a call site, it belongs not in this inlined block, + // but in the parent block that inlined it. + Address parent_start_addr; + if (sibling_block->GetParent()->GetStartAddress( + parent_start_addr)) { + SymbolContext sc; + parent_start_addr.CalculateSymbolContext(&sc, resolve_scope); + // Now swap out the line entry for the one we found. + LineEntry call_site_line = sc.line_entry; + call_site_line.line = found_decl.GetLine(); + call_site_line.column = found_decl.GetColumn(); + bool matches_spec = true; + // If the user asked for an exact match, we need to make sure the + // call site we found actually matches the location. + if (src_location_spec.GetExactMatch()) { + matches_spec = false; + if ((src_location_spec.GetFileSpec() == + sc.line_entry.GetFile()) && + (src_location_spec.GetLine() && + *src_location_spec.GetLine() == call_site_line.line) && + (src_location_spec.GetColumn() && + *src_location_spec.GetColumn() == call_site_line.column)) + matches_spec = true; + } + if (matches_spec && + sibling_block->GetRangeAtIndex(0, call_site_line.range)) { + SymbolContext call_site_sc(sc.target_sp, sc.module_sp, + sc.comp_unit, sc.function, sc.block, + &call_site_line, sc.symbol); + sc_list.Append(call_site_sc); + } + } + } + } + + // Descend into the child blocks: + examine_block(*sibling_block); + // Now go to the next sibling: + sibling_block = sibling_block->GetSibling(); + } + }; + + if (function) { + // We don't need to examine the function block, it can't be inlined. + Block &func_block = function->GetBlock(true); + examine_block(func_block); + } + // If we found entries here, we are done. We only get here because we + // didn't find an exact line entry for this line & column, but if we found + // an exact match from the call site info that's strictly better than + // continuing to look for matches further on in the file. + // FIXME: Should I also do this for "call site line exists between the + // given line number and the later line we found in the line table"? That's + // a closer approximation to our general sliding algorithm. + if (sc_list.GetSize()) + return; + } + // If "exact == true", then "found_line" will be the same as "line". If // "exact == false", the "found_line" will be the closest line entry // with a line number greater than "line" and we will use this for our diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index 3849ec5ed178d9a..94a381edd5e2027 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -85,121 +85,32 @@ void StackFrameList::ResetCurrentInlinedDepth() { return; std::lock_guard guard(m_mutex); - - GetFramesUpTo(0, DoNotAllowInterruption); - if (m_frames.empty()) - return; - if (!m_frames[0]->IsInlined()) { - m_current_inlined_depth = UINT32_MAX; - m_current_inlined_pc = LLDB_INVALID_ADDRESS; - Log *log = GetLog(LLDBLog::Step); - if (log && log->GetVerbose()) - LLDB_LOGF( - log, - "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); - return; - } - // We only need to do something special about inlined blocks when we are - // at the beginning of an inlined function: - // FIXME: We probably also have to do something special if the PC is at - // the END of an inlined function, which coincides with the end of either - // its containing function or another inlined function. - - Block *block_ptr = m_frames[0]->GetFrameBlock(); - if (!block_ptr) - return; + m_current_inlined_pc = LLDB_INVALID_ADDRESS; + m_current_inlined_depth = UINT32_MAX; - Address pc_as_address; - lldb::addr_t curr_pc = m_thread.GetRegisterContext()->GetPC(); - pc_as_address.SetLoadAddress(curr_pc, &(m_thread.GetProcess()->GetTarget())); - AddressRange containing_range; - if (!block_ptr->GetRangeContainingAddress(pc_as_address, containing_range) || - pc_as_address != containing_range.GetBaseAddress()) - return; - - // If we got here because of a breakpoint hit, then set the inlined depth - // depending on where the breakpoint was set. If we got here because of a - // crash, then set the inlined depth to the deepest most block. Otherwise, - // we stopped here naturally as the result of a step, so set ourselves in the - // containing frame of the whole set of nested inlines, so the user can then - // "virtually" step into the frames one by one, or next over the whole mess. - // Note: We don't have to handle being somewhere in the middle of the stack - // here, since ResetCurrentInlinedDepth doesn't get called if there is a - // valid inlined depth set. StopInfoSP stop_info_sp = m_thread.GetStopInfo(); if (!stop_info_sp) return; - switch (stop_info_sp->GetStopReason()) { - case eStopReasonWatchpoint: - case eStopReasonException: - case eStopReasonExec: - case eStopReasonFork: - case eStopReasonVFork: - case eStopReasonVForkDone: - case eStopReasonSignal: - // In all these cases we want to stop in the deepest frame. - m_current_inlined_pc = curr_pc; - m_current_inlined_depth = 0; - break; - case eStopReasonBreakpoint: { - // FIXME: Figure out what this break point is doing, and set the inline - // depth appropriately. Be careful to take into account breakpoints that - // implement step over prologue, since that should do the default - // calculation. For now, if the breakpoints corresponding to this hit are - // all internal, I set the stop location to the top of the inlined stack, - // since that will make things like stepping over prologues work right. - // But if there are any non-internal breakpoints I do to the bottom of the - // stack, since that was the old behavior. - uint32_t bp_site_id = stop_info_sp->GetValue(); - BreakpointSiteSP bp_site_sp( - m_thread.GetProcess()->GetBreakpointSiteList().FindByID(bp_site_id)); - bool all_internal = true; - if (bp_site_sp) { - uint32_t num_owners = bp_site_sp->GetNumberOfConstituents(); - for (uint32_t i = 0; i < num_owners; i++) { - Breakpoint &bp_ref = - bp_site_sp->GetConstituentAtIndex(i)->GetBreakpoint(); - if (!bp_ref.IsInternal()) { - all_internal = false; - } - } - } - if (!all_internal) { - m_current_inlined_pc = curr_pc; - m_current_inlined_depth = 0; - break; - } - } - [[fallthrough]]; - default: { - // Otherwise, we should set ourselves at the container of the inlining, so - // that the user can descend into them. So first we check whether we have - // more than one inlined block sharing this PC: - int num_inlined_functions = 0; - - for (Block *container_ptr = block_ptr->GetInlinedParent(); - container_ptr != nullptr; - container_ptr = container_ptr->GetInlinedParent()) { - if (!container_ptr->GetRangeContainingAddress(pc_as_address, - containing_range)) - break; - if (pc_as_address != containing_range.GetBaseAddress()) - break; - num_inlined_functions++; - } - m_current_inlined_pc = curr_pc; - m_current_inlined_depth = num_inlined_functions + 1; - Log *log = GetLog(LLDBLog::Step); + bool inlined = true; + auto inline_depth = stop_info_sp->GetSuggestedStackFrameIndex(inlined); + // We're only adjusting the inlined stack here. + Log *log = GetLog(LLDBLog::Step); + if (inline_depth) { + m_current_inlined_depth = *inline_depth; + m_current_inlined_pc = m_thread.GetRegisterContext()->GetPC(); + if (log && log->GetVerbose()) LLDB_LOGF(log, "ResetCurrentInlinedDepth: setting inlined " "depth: %d 0x%" PRIx64 ".\n", - m_current_inlined_depth, curr_pc); - - break; - } + m_current_inlined_depth, m_current_inlined_pc); + } else { + if (log && log->GetVerbose()) + LLDB_LOGF( + log, + "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); } } @@ -816,19 +727,48 @@ void StackFrameList::SelectMostRelevantFrame() { RecognizedStackFrameSP recognized_frame_sp = frame_sp->GetRecognizedFrame(); - if (!recognized_frame_sp) { - LLDB_LOG(log, "Frame #0 not recognized"); - return; + if (recognized_frame_sp) { + if (StackFrameSP most_relevant_frame_sp = + recognized_frame_sp->GetMostRelevantFrame()) { + LLDB_LOG(log, "Found most relevant frame at index {0}", + most_relevant_frame_sp->GetFrameIndex()); + SetSelectedFrame(most_relevant_frame_sp.get()); + return; + } } + LLDB_LOG(log, "Frame #0 not recognized"); - if (StackFrameSP most_relevant_frame_sp = - recognized_frame_sp->GetMostRelevantFrame()) { - LLDB_LOG(log, "Found most relevant frame at index {0}", - most_relevant_frame_sp->GetFrameIndex()); - SetSelectedFrame(most_relevant_frame_sp.get()); - } else { - LLDB_LOG(log, "No relevant frame!"); + // If this thread has a non-trivial StopInof, then let it suggest + // a most relevant frame: + StopInfoSP stop_info_sp = m_thread.GetStopInfo(); + uint32_t stack_idx = 0; + bool found_relevant = false; + if (stop_info_sp) { + // Here we're only asking the stop info if it wants to adjust the real stack + // index. We have to ask about the m_inlined_stack_depth in + // Thread::ShouldStop since the plans need to reason with that info. + bool inlined = false; + std::optional stack_opt = + stop_info_sp->GetSuggestedStackFrameIndex(inlined); + if (stack_opt) { + stack_idx = *stack_opt; + found_relevant = true; + } } + + frame_sp = GetFrameAtIndex(stack_idx); + if (!frame_sp) + LLDB_LOG(log, "Stop info suggested relevant frame {0} but it didn't exist", + stack_idx); + else if (found_relevant) + LLDB_LOG(log, "Setting selected frame from stop info to {0}", stack_idx); + // Note, we don't have to worry about "inlined" frames here, because we've + // already calculated the inlined frame in Thread::ShouldStop, and + // SetSelectedFrame will take care of that adjustment for us. + SetSelectedFrame(frame_sp.get()); + + if (!found_relevant) + LLDB_LOG(log, "No relevant frame!"); } uint32_t StackFrameList::GetSelectedFrameIndex( @@ -841,6 +781,7 @@ uint32_t StackFrameList::GetSelectedFrameIndex( // isn't set, then don't force a selection here, just return 0. if (!select_most_relevant) return 0; + // If the inlined stack frame is set, then use that: m_selected_frame_idx = 0; } return *m_selected_frame_idx; diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index 60aa65ed38c7494..f6387d47504e626 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -15,6 +15,7 @@ #include "lldb/Breakpoint/WatchpointResource.h" #include "lldb/Core/Debugger.h" #include "lldb/Expression/UserExpression.h" +#include "lldb/Symbol/Block.h" #include "lldb/Target/Process.h" #include "lldb/Target/StopInfo.h" #include "lldb/Target/Target.h" @@ -246,6 +247,22 @@ class StopInfoBreakpoint : public StopInfo { return m_description.c_str(); } + std::optional + GetSuggestedStackFrameIndex(bool inlined_stack) override { + if (!inlined_stack) + return {}; + + ThreadSP thread_sp(m_thread_wp.lock()); + if (!thread_sp) + return {}; + BreakpointSiteSP bp_site_sp( + thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); + if (!bp_site_sp) + return {}; + + return bp_site_sp->GetSuggestedStackFrameIndex(); + } + protected: bool ShouldStop(Event *event_ptr) override { // This just reports the work done by PerformAction or the synchronous @@ -1164,6 +1181,44 @@ class StopInfoTrace : public StopInfo { else return m_description.c_str(); } + + std::optional + GetSuggestedStackFrameIndex(bool inlined_stack) override { + // Trace only knows how to adjust inlined stacks: + if (!inlined_stack) + return {}; + + ThreadSP thread_sp = GetThread(); + StackFrameSP frame_0_sp = thread_sp->GetStackFrameAtIndex(0); + if (!frame_0_sp) + return {}; + if (!frame_0_sp->IsInlined()) + return {}; + Block *block_ptr = frame_0_sp->GetFrameBlock(); + if (!block_ptr) + return {}; + Address pc_address = frame_0_sp->GetFrameCodeAddress(); + AddressRange containing_range; + if (!block_ptr->GetRangeContainingAddress(pc_address, containing_range) || + pc_address != containing_range.GetBaseAddress()) + return {}; + + int num_inlined_functions = 0; + + for (Block *container_ptr = block_ptr->GetInlinedParent(); + container_ptr != nullptr; + container_ptr = container_ptr->GetInlinedParent()) { + if (!container_ptr->GetRangeContainingAddress(pc_address, + containing_range)) + break; + if (pc_address != containing_range.GetBaseAddress()) + break; + + num_inlined_functions++; + } + inlined_stack = true; + return num_inlined_functions + 1; + } }; // StopInfoException diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 8373cdc36268f8d..735295e6f25937a 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -619,6 +619,14 @@ void Thread::WillStop() { void Thread::SetupForResume() { if (GetResumeState() != eStateSuspended) { + // First check whether this thread is going to "actually" resume at all. + // For instance, if we're stepping from one level to the next of an + // virtual inlined call stack, we just change the inlined call stack index + // without actually running this thread. In that case, for this thread we + // shouldn't push a step over breakpoint plan or do that work. + if (GetCurrentPlan()->IsVirtualStep()) + return; + // If we're at a breakpoint push the step-over breakpoint plan. Do this // before telling the current plan it will resume, since we might change // what the current plan is. diff --git a/lldb/source/Target/ThreadPlanStepInRange.cpp b/lldb/source/Target/ThreadPlanStepInRange.cpp index 567dcc26d0d3727..325a70619908b6b 100644 --- a/lldb/source/Target/ThreadPlanStepInRange.cpp +++ b/lldb/source/Target/ThreadPlanStepInRange.cpp @@ -41,7 +41,7 @@ ThreadPlanStepInRange::ThreadPlanStepInRange( "Step Range stepping in", thread, range, addr_context, stop_others), ThreadPlanShouldStopHere(this), m_step_past_prologue(true), - m_virtual_step(false), m_step_into_target(step_into_target) { + m_virtual_step(eLazyBoolCalculate), m_step_into_target(step_into_target) { SetCallbacks(); SetFlagsToDefault(); SetupAvoidNoDebug(step_in_avoids_code_without_debug_info, @@ -149,7 +149,7 @@ bool ThreadPlanStepInRange::ShouldStop(Event *event_ptr) { m_sub_plan_sp.reset(); } - if (m_virtual_step) { + if (m_virtual_step == eLazyBoolYes) { // If we've just completed a virtual step, all we need to do is check for a // ShouldStopHere plan, and otherwise we're done. // FIXME - This can be both a step in and a step out. Probably should @@ -431,7 +431,7 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool return_value = false; - if (m_virtual_step) { + if (m_virtual_step == eLazyBoolYes) { return_value = true; } else { StopInfoSP stop_info_sp = GetPrivateStopInfo(); @@ -460,10 +460,13 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, bool current_plan) { - m_virtual_step = false; + m_virtual_step = eLazyBoolCalculate; if (resume_state == eStateStepping && current_plan) { Thread &thread = GetThread(); // See if we are about to step over a virtual inlined call. + // But if we already know we're virtual stepping, don't decrement the + // inlined depth again... + bool step_without_resume = thread.DecrementCurrentInlinedDepth(); if (step_without_resume) { Log *log = GetLog(LLDBLog::Step); @@ -476,11 +479,20 @@ bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, // FIXME: Maybe it would be better to create a InlineStep stop reason, but // then // the whole rest of the world would have to handle that stop reason. - m_virtual_step = true; + m_virtual_step = eLazyBoolYes; } return !step_without_resume; } return true; } -bool ThreadPlanStepInRange::IsVirtualStep() { return m_virtual_step; } +bool ThreadPlanStepInRange::IsVirtualStep() { + if (m_virtual_step == eLazyBoolCalculate) { + Thread &thread = GetThread(); + if (thread.GetCurrentInlinedDepth() == UINT32_MAX) + m_virtual_step = eLazyBoolNo; + else + m_virtual_step = eLazyBoolYes; + } + return m_virtual_step == eLazyBoolYes; +} diff --git a/lldb/source/Target/ThreadPlanStepOverRange.cpp b/lldb/source/Target/ThreadPlanStepOverRange.cpp index ef5b4b5c434d16e..643ee827c865cb8 100644 --- a/lldb/source/Target/ThreadPlanStepOverRange.cpp +++ b/lldb/source/Target/ThreadPlanStepOverRange.cpp @@ -402,7 +402,7 @@ bool ThreadPlanStepOverRange::DoWillResume(lldb::StateType resume_state, if (in_inlined_stack) { Log *log = GetLog(LLDBLog::Step); LLDB_LOGF(log, - "ThreadPlanStepInRange::DoWillResume: adjusting range to " + "ThreadPlanStepOverRange::DoWillResume: adjusting range to " "the frame at inlined depth %d.", thread.GetCurrentInlinedDepth()); StackFrameSP stack_sp = thread.GetStackFrameAtIndex(0); diff --git a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py index 752c3a9cbd286a8..f52e0f0fd5bcfe0 100644 --- a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py +++ b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py @@ -32,6 +32,12 @@ def test_step_in_template_with_python_api(self): self.build() self.step_in_template() + @add_test_categories(["pyapi"]) + def test_virtual_inline_stepping(self): + """Test stepping through a virtual inlined call stack""" + self.build() + self.virtual_inline_stepping() + def setUp(self): # Call super's setUp(). TestBase.setUp(self) @@ -357,3 +363,60 @@ def step_in_template(self): step_sequence = [["// In max_value specialized", "into"]] self.run_step_sequence(step_sequence) + + def run_to_call_site_and_step(self, source_regex, func_name, start_pos): + main_spec = lldb.SBFileSpec("calling.cpp") + # Set the breakpoint by file and line, not sourced regex because + # we want to make sure we can set breakpoints on call sites: + call_site_line_num = line_number(self.main_source, source_regex) + target, process, thread, bkpt = lldbutil.run_to_line_breakpoint( + self, main_spec, call_site_line_num + ) + + # Make sure that the location is at the call site (run_to_line_breakpoint already asserted + # that there's one location.): + bkpt_loc = bkpt.location[0] + strm = lldb.SBStream() + result = bkpt_loc.GetDescription(strm, lldb.eDescriptionLevelFull) + + self.assertTrue(result, "Got a location description") + desc = strm.GetData() + self.assertIn(f"calling.cpp:{call_site_line_num}", desc, "Right line listed") + # We don't get the function name right yet - so we omit it in printing. + # Turn on this test when that is working. + # self.assertIn(func_name, desc, "Right function listed") + + pc = thread.frame[0].pc + for i in range(start_pos, 3): + thread.StepInto() + frame_0 = thread.frame[0] + + trivial_line_num = line_number( + self.main_source, f"In caller_trivial_inline_{i}." + ) + self.assertEqual( + frame_0.line_entry.line, + trivial_line_num, + f"Stepped into the caller_trivial_inline_{i}", + ) + if pc != frame_0.pc: + # If we get here, we stepped to the expected line number, but + # the compiler on this system has decided to insert an instruction + # between the call site of an inlined function with no arguments, + # returning void, and its immediate call to another void inlined function + # with no arguments. We aren't going to be testing virtual inline + # stepping for this function... + break + + process.Kill() + target.Clear() + + def virtual_inline_stepping(self): + """Use the Python API's to step through a virtual inlined stack""" + self.run_to_call_site_and_step("At caller_trivial_inline_1", "main", 1) + self.run_to_call_site_and_step( + "In caller_trivial_inline_1", "caller_trivial_inline_1", 2 + ) + self.run_to_call_site_and_step( + "In caller_trivial_inline_2", "caller_trivial_inline_2", 3 + ) diff --git a/lldb/test/API/functionalities/inline-stepping/calling.cpp b/lldb/test/API/functionalities/inline-stepping/calling.cpp index 49179ce7c97883c..d7ee56b3c079091 100644 --- a/lldb/test/API/functionalities/inline-stepping/calling.cpp +++ b/lldb/test/API/functionalities/inline-stepping/calling.cpp @@ -13,6 +13,12 @@ int called_by_inline_ref (int &value); inline void inline_trivial_1 () __attribute__((always_inline)); inline void inline_trivial_2 () __attribute__((always_inline)); +// These three should share the same initial pc so we can test +// virtual inline stepping. +inline void caller_trivial_inline_1() __attribute__((always_inline)); +inline void caller_trivial_inline_2() __attribute__((always_inline)); +inline void caller_trivial_inline_3() __attribute__((always_inline)); + void caller_trivial_1 (); void caller_trivial_2 (); @@ -79,6 +85,23 @@ caller_trivial_2 () inline_value += 1; // At increment in caller_trivial_2. } +// When you call caller_trivial_inline_1, the inlined call-site +// should share a PC with all three of the following inlined +// functions, so we can exercise "virtual inline stepping". +void caller_trivial_inline_1() { + caller_trivial_inline_2(); // In caller_trivial_inline_1. + inline_value += 1; +} + +void caller_trivial_inline_2() { + caller_trivial_inline_3(); // In caller_trivial_inline_2. + inline_value += 1; +} + +void caller_trivial_inline_3() { + inline_value += 1; // In caller_trivial_inline_3. +} + void called_by_inline_trivial () { @@ -132,5 +155,7 @@ main (int argc, char **argv) max_value(123, 456); // Call max_value template max_value(std::string("abc"), std::string("0022")); // Call max_value specialized + caller_trivial_inline_1(); // At caller_trivial_inline_1. + return 0; // About to return from main. } From e517cfc531886bf6ed64b4e7109bb3141ac7f430 Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Mon, 28 Oct 2024 10:13:45 -0700 Subject: [PATCH 195/425] [InstrPGO] Support cold function coverage instrumentation (#109837) This patch adds support for cold function coverage instrumentation based on sampling PGO counts. The major motivation is to detect dead functions for the services that are optimized with sampling PGO. If a function is covered by sampling profile count (e.g., those with an entry count > 0), we choose to skip instrumenting those functions, which significantly reduces the instrumentation overhead. More details about the implementation and flags: - Added a flag `--pgo-instrument-cold-function-only` in `PGOInstrumentation.cpp` as the main switch to control skipping the instrumentation. - Built the extra instrumentation passes(a bundle of passes in `addPGOInstrPasses`) under sampling PGO pipeline. This is controlled by `--instrument-cold-function-only-path` flag. - Added a driver flag `-fprofile-generate-cold-function-coverage`: - 1) Config the flags in one place, i,e. adding `--instrument-cold-function-only-path=<...>` and `--pgo-function-entry-coverage`. Note that the instrumentation file path is passed through `--instrument-sample-cold-function-path`, because we cannot use the `PGOOptions.ProfileFile` as it's already used by `-fprofile-sample-use=<...>`. - 2) makes linker to link `compiler_rt.profile` lib(see [ToolChain.cpp#L1125-L1131](https://github.com/llvm/llvm-project/blob/main/clang/lib/Driver/ToolChain.cpp#L1125-L1131) ). - Added a flag(`--pgo-cold-instrument-entry-threshold`) to config entry count to determine cold function. Overall, the full command is like: ``` clang++ -O2 -fprofile-generate-cold-function-coverage=<...> -fprofile-sample-use=<...> code.cc -o code ``` --- clang/include/clang/Driver/Options.td | 6 ++++ clang/lib/Driver/ToolChain.cpp | 4 ++- clang/lib/Driver/ToolChains/Clang.cpp | 20 +++++++++++ .../test/CodeGen/pgo-cold-function-coverage.c | 19 ++++++++++ ...fprofile-generate-cold-function-coverage.c | 8 +++++ llvm/lib/Passes/PassBuilderPipelines.cpp | 17 ++++++++- .../Instrumentation/PGOInstrumentation.cpp | 19 ++++++++++ .../PGOProfile/instr-gen-cold-function.ll | 35 +++++++++++++++++++ 8 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/pgo-cold-function-coverage.c create mode 100644 clang/test/Driver/fprofile-generate-cold-function-coverage.c create mode 100644 llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 69ae0553507bd3a..1ddf488b8bf4c7a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1786,6 +1786,12 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling", PosFlag, NegFlag>; +def fprofile_generate_cold_function_coverage : Flag<["-"], "fprofile-generate-cold-function-coverage">, + Group, Visibility<[ClangOption, CLOption]>, + HelpText<"Generate instrumented code to collect coverage info for cold functions into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">; +def fprofile_generate_cold_function_coverage_EQ : Joined<["-"], "fprofile-generate-cold-function-coverage=">, + Group, Visibility<[ClangOption, CLOption]>, MetaVarName<"">, + HelpText<"Generate instrumented code to collect coverage info for cold functions into /default.profraw (overridden by LLVM_PROFILE_FILE env var)">; def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">, Group, Visibility<[ClangOption, CLOption]>, HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 4df317709508587..34de0043ca012aa 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -897,7 +897,9 @@ bool ToolChain::needsProfileRT(const ArgList &Args) { Args.hasArg(options::OPT_fprofile_instr_generate) || Args.hasArg(options::OPT_fprofile_instr_generate_EQ) || Args.hasArg(options::OPT_fcreate_profile) || - Args.hasArg(options::OPT_forder_file_instrumentation); + Args.hasArg(options::OPT_forder_file_instrumentation) || + Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage) || + Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage_EQ); } bool ToolChain::needsGCovInstrumentation(const llvm::opt::ArgList &Args) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 04b3832327a99c4..4c6f508f1f24a62 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -632,6 +632,26 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, } } + if (auto *ColdFuncCoverageArg = Args.getLastArg( + options::OPT_fprofile_generate_cold_function_coverage, + options::OPT_fprofile_generate_cold_function_coverage_EQ)) { + SmallString<128> Path( + ColdFuncCoverageArg->getOption().matches( + options::OPT_fprofile_generate_cold_function_coverage_EQ) + ? ColdFuncCoverageArg->getValue() + : ""); + llvm::sys::path::append(Path, "default_%m.profraw"); + // FIXME: Idealy the file path should be passed through + // `-fprofile-instrument-path=`(InstrProfileOutput), however, this field is + // shared with other profile use path(see PGOOptions), we need to refactor + // PGOOptions to make it work. + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back(Args.MakeArgString( + Twine("--instrument-cold-function-only-path=") + Path)); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("--pgo-function-entry-coverage"); + } + Arg *PGOGenArg = nullptr; if (PGOGenerateArg) { assert(!CSPGOGenerateArg); diff --git a/clang/test/CodeGen/pgo-cold-function-coverage.c b/clang/test/CodeGen/pgo-cold-function-coverage.c new file mode 100644 index 000000000000000..fd1e1e7e14cda56 --- /dev/null +++ b/clang/test/CodeGen/pgo-cold-function-coverage.c @@ -0,0 +1,19 @@ +// Test -fprofile-generate-cold-function-coverage + +// RUN: rm -rf %t && split-file %s %t +// RUN: %clang -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%t/pgo-cold-func.prof -S -emit-llvm -o - %t/pgo-cold-func.c | FileCheck %s + +// CHECK: @__llvm_profile_filename = {{.*}} c"/xxx/yyy/default_%m.profraw\00" + +// CHECK: store i8 0, ptr @__profc_bar, align 1 +// CHECK-NOT: @__profc_foo + +//--- pgo-cold-func.prof +foo:1:1 + 1: 1 + +//--- pgo-cold-func.c +int bar(int x) { return x;} +int foo(int x) { + return x; +} diff --git a/clang/test/Driver/fprofile-generate-cold-function-coverage.c b/clang/test/Driver/fprofile-generate-cold-function-coverage.c new file mode 100644 index 000000000000000..9b2f46423f34b1d --- /dev/null +++ b/clang/test/Driver/fprofile-generate-cold-function-coverage.c @@ -0,0 +1,8 @@ +// RUN: %clang -### -c -fprofile-generate-cold-function-coverage %s 2>&1 | FileCheck %s +// CHECK: "--instrument-cold-function-only-path=default_%m.profraw" +// CHECK: "--pgo-function-entry-coverage" +// CHECK-NOT: "-fprofile-instrument" +// CHECK-NOT: "-fprofile-instrument-path= + +// RUN: %clang -### -c -fprofile-generate-cold-function-coverage=dir %s 2>&1 | FileCheck %s --check-prefix=CHECK-EQ +// CHECK-EQ: "--instrument-cold-function-only-path=dir{{/|\\\\}}default_%m.profraw" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 17710eb94b6dedb..488554c84c1c437 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -296,7 +296,12 @@ static cl::opt UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); +static cl::opt InstrumentColdFuncOnlyPath( + "instrument-cold-function-only-path", cl::init(""), + cl::desc("File path for cold function only instrumentation"), cl::Hidden); + extern cl::opt UseCtxProfile; +extern cl::opt PGOInstrumentColdFunctionOnly; namespace llvm { extern cl::opt EnableMemProfContextDisambiguation; @@ -1182,8 +1187,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, const bool IsCtxProfUse = !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink; + // Enable cold function coverage instrumentation if + // InstrumentColdFuncOnlyPath is provided. + const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly = + IsPGOPreLink && !InstrumentColdFuncOnlyPath.empty(); + if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen || - IsCtxProfUse) + IsCtxProfUse || IsColdFuncOnlyInstrGen) addPreInlinerPasses(MPM, Level, Phase); // Add all the requested passes for instrumentation PGO, if requested. @@ -1205,6 +1215,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, return MPM; addPostPGOLoopRotation(MPM, Level); MPM.addPass(PGOCtxProfLoweringPass()); + } else if (IsColdFuncOnlyInstrGen) { + addPGOInstrPasses( + MPM, Level, /* RunProfileGen */ true, /* IsCS */ false, + /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath, + /* ProfileRemappingFile */ "", IntrusiveRefCntPtr()); } if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen) diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index bceb6135cc1f926..4d8141431a0c191 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -319,6 +319,20 @@ static cl::opt PGOFunctionCriticalEdgeThreshold( cl::desc("Do not instrument functions with the number of critical edges " " greater than this threshold.")); +static cl::opt PGOColdInstrumentEntryThreshold( + "pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden, + cl::desc("For cold function instrumentation, skip instrumenting functions " + "whose entry count is above the given value.")); + +static cl::opt PGOTreatUnknownAsCold( + "pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden, + cl::desc("For cold function instrumentation, treat count unknown(e.g. " + "unprofiled) functions as cold.")); + +cl::opt PGOInstrumentColdFunctionOnly( + "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden, + cl::desc("Enable cold function only instrumentation.")); + extern cl::opt MaxNumVTableAnnotations; namespace llvm { @@ -1897,6 +1911,11 @@ static bool skipPGOGen(const Function &F) { return true; if (F.getInstructionCount() < PGOFunctionSizeThreshold) return true; + if (PGOInstrumentColdFunctionOnly) { + if (auto EntryCount = F.getEntryCount()) + return EntryCount->getCount() > PGOColdInstrumentEntryThreshold; + return !PGOTreatUnknownAsCold; + } return false; } diff --git a/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll b/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll new file mode 100644 index 000000000000000..ab8cf8c010812b3 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s --passes=pgo-instr-gen -pgo-instrument-cold-function-only -pgo-function-entry-coverage -S | FileCheck --check-prefixes=COLD %s +; RUN: opt < %s --passes=pgo-instr-gen -pgo-instrument-cold-function-only -pgo-function-entry-coverage -pgo-cold-instrument-entry-threshold=1 -S | FileCheck --check-prefixes=ENTRY-COUNT %s +; RUN: opt < %s --passes=pgo-instr-gen -pgo-instrument-cold-function-only -pgo-function-entry-coverage -pgo-treat-unknown-as-cold -S | FileCheck --check-prefixes=UNKNOWN-FUNC %s + +; COLD: call void @llvm.instrprof.cover(ptr @__profn_foo, i64 [[#]], i32 1, i32 0) +; COLD-NOT: __profn_main +; COLD-NOT: __profn_bar + +; ENTRY-COUNT: call void @llvm.instrprof.cover(ptr @__profn_foo, i64 [[#]], i32 1, i32 0) +; ENTRY-COUNT: call void @llvm.instrprof.cover(ptr @__profn_main, i64 [[#]], i32 1, i32 0) + +; UNKNOWN-FUNC: call void @llvm.instrprof.cover(ptr @__profn_bar, i64 [[#]], i32 1, i32 0) +; UNKNOWN-FUNC: call void @llvm.instrprof.cover(ptr @__profn_foo, i64 [[#]], i32 1, i32 0) + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @bar() { +entry: + ret void +} + +define void @foo() !prof !0 { +entry: + ret void +} + +define i32 @main() !prof !1 { +entry: + ret i32 0 +} + +!0 = !{!"function_entry_count", i64 0} +!1 = !{!"function_entry_count", i64 1} From 98e3075df992636fa42aafde96748d1d5c834688 Mon Sep 17 00:00:00 2001 From: Steven Perron Date: Mon, 28 Oct 2024 13:25:04 -0400 Subject: [PATCH 196/425] [HLSL][SPIRV] Add convergence tokens to entry point wrapper (#112757) Inlining currently assumes that either all function use controled convergence or none of them do. This is why we need to have the entry point wrapper use controled convergence. https://github.com/llvm/llvm-project/blob/c85611e8583e6392d56075ebdfa60893b6284813/llvm/lib/Transforms/Utils/InlineFunction.cpp#L2431-L2439 --- clang/lib/CodeGen/CGHLSLRuntime.cpp | 41 +++++++++++++++++-- clang/lib/CodeGen/CGHLSLRuntime.h | 1 + .../CodeGenHLSL/convergence/entry.point.hlsl | 11 +++++ 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 clang/test/CodeGenHLSL/convergence/entry.point.hlsl diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 2cce2936fe5aeef..06558ce796f2e49 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -404,6 +404,16 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, BasicBlock *BB = BasicBlock::Create(Ctx, "entry", EntryFn); IRBuilder<> B(BB); llvm::SmallVector Args; + + SmallVector OB; + if (CGM.shouldEmitConvergenceTokens()) { + assert(EntryFn->isConvergent()); + llvm::Value *I = B.CreateIntrinsic( + llvm::Intrinsic::experimental_convergence_entry, {}, {}); + llvm::Value *bundleArgs[] = {I}; + OB.emplace_back("convergencectrl", bundleArgs); + } + // FIXME: support struct parameters where semantics are on members. // See: https://github.com/llvm/llvm-project/issues/57874 unsigned SRetOffset = 0; @@ -419,7 +429,7 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, Args.push_back(emitInputSemantic(B, *PD, Param.getType())); } - CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args); + CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args, OB); CI->setCallingConv(Fn->getCallingConv()); // FIXME: Handle codegen for return type semantics. // See: https://github.com/llvm/llvm-project/issues/57875 @@ -474,14 +484,22 @@ void CGHLSLRuntime::generateGlobalCtorDtorCalls() { for (auto &F : M.functions()) { if (!F.hasFnAttribute("hlsl.shader")) continue; - IRBuilder<> B(&F.getEntryBlock(), F.getEntryBlock().begin()); + auto *Token = getConvergenceToken(F.getEntryBlock()); + Instruction *IP = &*F.getEntryBlock().begin(); + SmallVector OB; + if (Token) { + llvm::Value *bundleArgs[] = {Token}; + OB.emplace_back("convergencectrl", bundleArgs); + IP = Token->getNextNode(); + } + IRBuilder<> B(IP); for (auto *Fn : CtorFns) - B.CreateCall(FunctionCallee(Fn)); + B.CreateCall(FunctionCallee(Fn), {}, OB); // Insert global dtors before the terminator of the last instruction B.SetInsertPoint(F.back().getTerminator()); for (auto *Fn : DtorFns) - B.CreateCall(FunctionCallee(Fn)); + B.CreateCall(FunctionCallee(Fn), {}, OB); } // No need to keep global ctors/dtors for non-lib profile after call to @@ -579,3 +597,18 @@ llvm::Function *CGHLSLRuntime::createResourceBindingInitFn() { Builder.CreateRetVoid(); return InitResBindingsFunc; } + +llvm::Instruction *CGHLSLRuntime::getConvergenceToken(BasicBlock &BB) { + if (!CGM.shouldEmitConvergenceTokens()) + return nullptr; + + auto E = BB.end(); + for (auto I = BB.begin(); I != E; ++I) { + auto *II = dyn_cast(&*I); + if (II && llvm::isConvergenceControlIntrinsic(II->getIntrinsicID())) { + return II; + } + } + llvm_unreachable("Convergence token should have been emitted."); + return nullptr; +} diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index ff7df41b5c62e71..cd533cad84e9fbe 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -143,6 +143,7 @@ class CGHLSLRuntime { bool needsResourceBindingInitFn(); llvm::Function *createResourceBindingInitFn(); + llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB); private: void addBufferResourceAnnotation(llvm::GlobalVariable *GV, diff --git a/clang/test/CodeGenHLSL/convergence/entry.point.hlsl b/clang/test/CodeGenHLSL/convergence/entry.point.hlsl new file mode 100644 index 000000000000000..337a9ad5026c161 --- /dev/null +++ b/clang/test/CodeGenHLSL/convergence/entry.point.hlsl @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: define void @main() +// CHECK-NEXT: entry: +// CHECK-NEXT: [[token:%[0-9]+]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: call spir_func void @_Z4mainv() [ "convergencectrl"(token [[token]]) ] + +[numthreads(1,1,1)] +void main() { +} + From 97fb21ac1d6bc528b61a555356457ff2129dfde1 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Mon, 28 Oct 2024 10:33:29 -0700 Subject: [PATCH 197/425] [rtsan] Intercept aligned_alloc on all versions of OSX if available on build machine (#112780) --- .../tests/rtsan_test_interceptors_posix.cpp | 17 +++++++++----- .../sanitizer_platform_interceptors.h | 22 ++++++++++++++++++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index 6233c3e91800e10..38274485c29f66a 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -122,13 +122,20 @@ TEST(TestRtsanInterceptors, VallocDiesWhenRealtime) { ExpectNonRealtimeSurvival(Func); } -#if SANITIZER_INTERCEPT_ALIGNED_ALLOC +#if __has_builtin(__builtin_available) && SANITIZER_APPLE +#define ALIGNED_ALLOC_AVAILABLE() (__builtin_available(macOS 10.15, *)) +#else +// We are going to assume this is true until we hit systems where it isn't +#define ALIGNED_ALLOC_AVAILABLE() (true) +#endif + TEST(TestRtsanInterceptors, AlignedAllocDiesWhenRealtime) { - auto Func = []() { EXPECT_NE(nullptr, aligned_alloc(16, 32)); }; - ExpectRealtimeDeath(Func, "aligned_alloc"); - ExpectNonRealtimeSurvival(Func); + if (ALIGNED_ALLOC_AVAILABLE()) { + auto Func = []() { EXPECT_NE(nullptr, aligned_alloc(16, 32)); }; + ExpectRealtimeDeath(Func, "aligned_alloc"); + ExpectNonRealtimeSurvival(Func); + } } -#endif // free_sized and free_aligned_sized (both C23) are not yet supported TEST(TestRtsanInterceptors, FreeDiesWhenRealtime) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 6959a6d52d604e0..3fd6b595ef197f8 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -84,6 +84,25 @@ #define SI_NOT_MAC 1 #endif +#if SANITIZER_APPLE +# include + +// aligned_alloc was introduced in OSX 10.15 +// Linking will fail when using an older SDK +# if defined(__MAC_10_15) +// macOS 10.15 is greater than our minimal deployment target. To ensure we +// generate a weak reference so the dylib continues to work on older +// systems, we need to forward declare the intercepted function as "weak +// imports". +SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment, + __sanitizer::usize __size); +# define SI_MAC_SDK_10_15_AVAILABLE 1 +# else +# define SI_MAC_SDK_10_15_AVAILABLE 0 +# endif // defined(__MAC_10_15) + +#endif // SANITIZER_APPLE + #if SANITIZER_IOS #define SI_IOS 1 #else @@ -500,7 +519,8 @@ #define SANITIZER_INTERCEPT_PVALLOC (SI_GLIBC || SI_ANDROID) #define SANITIZER_INTERCEPT_CFREE (SI_GLIBC && !SANITIZER_RISCV64) #define SANITIZER_INTERCEPT_REALLOCARRAY SI_POSIX -#define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC) +#define SANITIZER_INTERCEPT_ALIGNED_ALLOC \ + (!SI_MAC || SI_MAC_SDK_10_15_AVAILABLE) #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC && !SI_NETBSD) #define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_WCSLEN 1 From 31a6dbe941b25aadd6cbf3829d1a96973968bf11 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Mon, 28 Oct 2024 18:44:23 +0100 Subject: [PATCH 198/425] [LLD][COFF] Add Support for ARM64EC pseudo relocations (#113832) --- lld/COFF/Chunks.cpp | 16 +++---- lld/test/COFF/autoimport-arm64ec-data.test | 56 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 lld/test/COFF/autoimport-arm64ec-data.test diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index c6986681dffe77a..33fb20ffeaf3212 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -570,8 +570,7 @@ void SectionChunk::getBaserels(std::vector *res) { // another DLL) This returns the size the relocation is supposed to update, // in bits, or 0 if the relocation cannot be handled as a runtime pseudo // relocation. -static int getRuntimePseudoRelocSize(uint16_t type, - llvm::COFF::MachineTypes machine) { +static int getRuntimePseudoRelocSize(uint16_t type, Triple::ArchType arch) { // Relocations that either contain an absolute address, or a plain // relative offset, since the runtime pseudo reloc implementation // adds 8/16/32/64 bit values to a memory address. @@ -597,8 +596,8 @@ static int getRuntimePseudoRelocSize(uint16_t type, // the image, or temporarily changed at runtime with VirtualProtect. // Since this only operates on direct address values, it doesn't work for // ARM/ARM64 relocations, other than the plain ADDR32/ADDR64 relocations. - switch (machine) { - case AMD64: + switch (arch) { + case Triple::x86_64: switch (type) { case IMAGE_REL_AMD64_ADDR64: return 64; @@ -613,7 +612,7 @@ static int getRuntimePseudoRelocSize(uint16_t type, default: return 0; } - case I386: + case Triple::x86: switch (type) { case IMAGE_REL_I386_DIR32: case IMAGE_REL_I386_REL32: @@ -621,14 +620,14 @@ static int getRuntimePseudoRelocSize(uint16_t type, default: return 0; } - case ARMNT: + case Triple::thumb: switch (type) { case IMAGE_REL_ARM_ADDR32: return 32; default: return 0; } - case ARM64: + case Triple::aarch64: switch (type) { case IMAGE_REL_ARM64_ADDR64: return 64; @@ -661,8 +660,7 @@ void SectionChunk::getRuntimePseudoRelocs( // alive. Thus such dangling references in DWARF sections are expected. if (!target->getChunk()) continue; - int sizeInBits = - getRuntimePseudoRelocSize(rel.Type, file->ctx.config.machine); + int sizeInBits = getRuntimePseudoRelocSize(rel.Type, getArch()); if (sizeInBits == 0) { error("unable to automatically import from " + target->getName() + " with relocation type " + diff --git a/lld/test/COFF/autoimport-arm64ec-data.test b/lld/test/COFF/autoimport-arm64ec-data.test new file mode 100644 index 000000000000000..4d71b55f651a654 --- /dev/null +++ b/lld/test/COFF/autoimport-arm64ec-data.test @@ -0,0 +1,56 @@ +# REQUIRES: aarch64, x86 +RUN: split-file %s %t.dir && cd %t.dir + +RUN: llvm-lib -machine:arm64ec -out:libtest.a -def:test.def +RUN: llvm-mc -triple=arm64ec-windows-gnu arm64ec.s -filetype=obj -o arm64ec.obj +RUN: llvm-mc -triple=arm64ec-windows-gnu x86_64.s -filetype=obj -o x86_64.obj + +RUN: lld-link -machine:arm64ec -out:out.dll -dll -noentry x86_64.obj arm64ec.obj libtest.a -lldmingw + +RUN: llvm-readobj --coff-imports out.dll | FileCheck -check-prefix=IMPORTS %s +RUN: llvm-objdump -s out.dll | FileCheck --check-prefix=CONTENTS %s + +IMPORTS: Import { +IMPORTS-NEXT: Name: test.dll +IMPORTS-NEXT: ImportLookupTableRVA: 0x40E0 +IMPORTS-NEXT: ImportAddressTableRVA: 0x3000 +IMPORTS-NEXT: Symbol: variable (0) +IMPORTS-NEXT: } + +Runtime pseudo relocation list header at 0x401c, consisting of 0x0, 0x0, 0x1. +The first runtime pseudo relocation is from an x86_64 object file, with import +from 0x3000, applied at 0x7000 with a size of 32 bits. The second pseudo +relocation is from an ARM64EC object file, with import from 0x3000, applied +at 0x7008 with a size of 32 bits. + +CONTENTS: Contents of section .rdata: +CONTENTS: 180004010 00200000 10200000 00200000 00000000 +CONTENTS: 180004020 00000000 01000000 00300000 00700000 +CONTENTS: 180004030 40000000 00300000 08700000 40000000 + +CONTENTS: Contents of section .test: +CONTENTS-NEXT: 180007000 00300080 01000000 00300080 01000000 +CONTENTS-NEXT: 180007010 1c400080 01000000 40400080 01000000 + +#--- arm64ec.s + .text + .global "#_pei386_runtime_relocator" +"#_pei386_runtime_relocator": + ret + + .weak_anti_dep _pei386_runtime_relocator +.set _pei386_runtime_relocator,"#_pei386_runtime_relocator" + + .section .test,"dr" + .quad variable + .quad __RUNTIME_PSEUDO_RELOC_LIST__ + .quad __RUNTIME_PSEUDO_RELOC_LIST_END__ + +#--- x86_64.s + .section .test,"dr" + .quad variable + +#--- test.def +LIBRARY test.dll +EXPORTS + variable DATA From 4cf128512be5d7e41d8b8b5a12eec47a64af36ea Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Mon, 28 Oct 2024 13:55:57 -0400 Subject: [PATCH 199/425] [NFC][AMDGPU] Use C++17 structured bindings as much as possible (#113939) This only changes `llvm/lib/Target/AMDGPU/SIISelLowering.cpp`. There are five uses of `std::tie` remaining because they can't be replaced with C++17 structured bindings. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 72 +++++++---------------- 1 file changed, 22 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 803eb86e08986cf..52ca38aca5c7711 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1948,13 +1948,9 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG, const DataLayout &DL = DAG.getDataLayout(); MachineFunction &MF = DAG.getMachineFunction(); const SIMachineFunctionInfo *Info = MF.getInfo(); - - const ArgDescriptor *InputPtrReg; - const TargetRegisterClass *RC; - LLT ArgTy; MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); - std::tie(InputPtrReg, RC, ArgTy) = + auto [InputPtrReg, RC, ArgTy] = Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR); // We may not have the kernarg segment argument if we have no kernel @@ -3335,25 +3331,18 @@ void SITargetLowering::passSpecialInputs( // clang-format on for (auto Attr : ImplicitAttrs) { - const ArgDescriptor *OutgoingArg; - const TargetRegisterClass *ArgRC; - LLT ArgTy; - AMDGPUFunctionArgInfo::PreloadedValue InputID = Attr.first; // If the callee does not use the attribute value, skip copying the value. if (CLI.CB->hasFnAttr(Attr.second)) continue; - std::tie(OutgoingArg, ArgRC, ArgTy) = + const auto [OutgoingArg, ArgRC, ArgTy] = CalleeArgInfo->getPreloadedValue(InputID); if (!OutgoingArg) continue; - const ArgDescriptor *IncomingArg; - const TargetRegisterClass *IncomingArgRC; - LLT Ty; - std::tie(IncomingArg, IncomingArgRC, Ty) = + const auto [IncomingArg, IncomingArgRC, Ty] = CallerArgInfo.getPreloadedValue(InputID); assert(IncomingArgRC == ArgRC); @@ -3396,11 +3385,8 @@ void SITargetLowering::passSpecialInputs( // Pack workitem IDs into a single register or pass it as is if already // packed. - const ArgDescriptor *OutgoingArg; - const TargetRegisterClass *ArgRC; - LLT Ty; - std::tie(OutgoingArg, ArgRC, Ty) = + auto [OutgoingArg, ArgRC, Ty] = CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X); if (!OutgoingArg) std::tie(OutgoingArg, ArgRC, Ty) = @@ -4460,15 +4446,13 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI, MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - MachineBasicBlock *LoopBB; - MachineBasicBlock *RemainderBB; const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); // Apparently kill flags are only valid if the def is in the same block? if (MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0)) Src->setIsKill(false); - std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true); + auto [LoopBB, RemainderBB] = splitBlockForLoop(MI, *BB, true); MachineBasicBlock::iterator I = LoopBB->end(); @@ -4628,9 +4612,7 @@ loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI, .addReg(Exec); // clang-format on - MachineBasicBlock *LoopBB; - MachineBasicBlock *RemainderBB; - std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, MBB, false); + auto [LoopBB, RemainderBB] = splitBlockForLoop(MI, MBB, false); const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx); @@ -5755,8 +5737,7 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op, VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16); - SDValue Lo, Hi; - std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); + auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0); SDLoc SL(Op); SDValue OpLo = DAG.getNode(Opc, SL, Lo.getValueType(), Lo, Op->getFlags()); @@ -5776,10 +5757,8 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op, VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 || VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16); - SDValue Lo0, Hi0; - std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0); - SDValue Lo1, Hi1; - std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1); + auto [Lo0, Hi0] = DAG.SplitVectorOperand(Op.getNode(), 0); + auto [Lo1, Hi1] = DAG.SplitVectorOperand(Op.getNode(), 1); SDLoc SL(Op); @@ -5802,15 +5781,13 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op, VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v16bf16 || VT == MVT::v32bf16); - SDValue Lo0, Hi0; SDValue Op0 = Op.getOperand(0); - std::tie(Lo0, Hi0) = Op0.getValueType().isVector() - ? DAG.SplitVectorOperand(Op.getNode(), 0) - : std::pair(Op0, Op0); - SDValue Lo1, Hi1; - std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1); - SDValue Lo2, Hi2; - std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2); + auto [Lo0, Hi0] = Op0.getValueType().isVector() + ? DAG.SplitVectorOperand(Op.getNode(), 0) + : std::pair(Op0, Op0); + + auto [Lo1, Hi1] = DAG.SplitVectorOperand(Op.getNode(), 1); + auto [Lo2, Hi2] = DAG.SplitVectorOperand(Op.getNode(), 2); SDLoc SL(Op); auto ResVT = DAG.GetSplitDestVTs(VT); @@ -7427,8 +7404,7 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, if (VecSize == 128 || VecSize == 256 || VecSize == 512) { SDValue Lo, Hi; - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT); if (VecSize == 128) { SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec); @@ -10459,9 +10435,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // size. switch (Subtarget->getMaxPrivateElementSize()) { case 4: { - SDValue Ops[2]; - std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG); - return DAG.getMergeValues(Ops, DL); + auto [Op0, Op1] = scalarizeVectorLoad(Load, DAG); + return DAG.getMergeValues({Op0, Op1}, DL); } case 8: if (NumElements > 2) @@ -10493,9 +10468,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), MemVT, *Load->getMemOperand())) { - SDValue Ops[2]; - std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG); - return DAG.getMergeValues(Ops, DL); + auto [Op0, Op1] = expandUnalignedLoad(Load, DAG); + return DAG.getMergeValues({Op0, Op1}, DL); } return SDValue(); @@ -12534,8 +12508,7 @@ SDValue SITargetLowering::performOrCombine(SDNode *N, EVT SrcVT = ExtSrc.getValueType(); if (SrcVT == MVT::i32) { SDLoc SL(N); - SDValue LowLHS, HiBits; - std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG); + auto [LowLHS, HiBits] = split64BitValue(LHS, DAG); SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc); DCI.AddToWorklist(LowOr.getNode()); @@ -13870,8 +13843,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N, getMad64_32(DAG, SL, MVT::i64, MulLHSLo, MulRHSLo, AddRHS, MulSignedLo); if (!MulSignedLo && (!MulLHSUnsigned32 || !MulRHSUnsigned32)) { - SDValue AccumLo, AccumHi; - std::tie(AccumLo, AccumHi) = DAG.SplitScalar(Accum, SL, MVT::i32, MVT::i32); + auto [AccumLo, AccumHi] = DAG.SplitScalar(Accum, SL, MVT::i32, MVT::i32); if (!MulLHSUnsigned32) { auto MulLHSHi = From f23bdbbaff5b89b1c102a155d062fc32f99d4a92 Mon Sep 17 00:00:00 2001 From: tf2spi Date: Mon, 28 Oct 2024 13:59:53 -0400 Subject: [PATCH 200/425] Add DILabel functions for LLVM-C (#112840) Addresses #112799 --- llvm/include/llvm-c/DebugInfo.h | 46 +++++++++++++++++++ llvm/lib/IR/DebugInfo.cpp | 41 +++++++++++++++++ .../Bindings/llvm-c/debug_info_new_format.ll | 22 +++++---- llvm/tools/llvm-c-test/debuginfo.c | 21 ++++++++- 4 files changed, 120 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h index 6d8891e70577222..f7d81636f4dd4ea 100644 --- a/llvm/include/llvm-c/DebugInfo.h +++ b/llvm/include/llvm-c/DebugInfo.h @@ -1415,6 +1415,52 @@ LLVMMetadataRef LLVMInstructionGetDebugLoc(LLVMValueRef Inst); */ void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc); +/** + * Create a new descriptor for a label + * + * \param Builder The DIBuilder. + * \param Scope The scope to create the label in. + * \param Name Variable name. + * \param NameLen Length of variable name. + * \param File The file to create the label in. + * \param LineNo Line Number. + * \param AlwaysPreserve Preserve the label regardless of optimization. + * + * @see llvm::DIBuilder::createLabel() + */ +LLVMMetadataRef LLVMDIBuilderCreateLabel( + LLVMDIBuilderRef Builder, + LLVMMetadataRef Context, const char *Name, size_t NameLen, + LLVMMetadataRef File, unsigned LineNo, LLVMBool AlwaysPreserve); + +/** + * Insert a new llvm.dbg.label intrinsic call + * + * \param Builder The DIBuilder. + * \param LabelInfo The Label's debug info descriptor + * \param Location The debug info location + * \param InsertBefore Location for the new intrinsic. + * + * @see llvm::DIBuilder::insertLabel() + */ +LLVMDbgRecordRef LLVMDIBuilderInsertLabelBefore( + LLVMDIBuilderRef Builder, LLVMMetadataRef LabelInfo, + LLVMMetadataRef Location, LLVMValueRef InsertBefore); + +/** + * Insert a new llvm.dbg.label intrinsic call + * + * \param Builder The DIBuilder. + * \param LabelInfo The Label's debug info descriptor + * \param Location The debug info location + * \param InsertAtEnd Location for the new intrinsic. + * + * @see llvm::DIBuilder::insertLabel() + */ +LLVMDbgRecordRef LLVMDIBuilderInsertLabelAtEnd( + LLVMDIBuilderRef Builder, LLVMMetadataRef LabelInfo, + LLVMMetadataRef Location, LLVMBasicBlockRef InsertAtEnd); + /** * Obtain the enumerated type of a Metadata instance. * diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 50b29ae4f41676a..e20a0f053481ed3 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -1799,6 +1799,47 @@ void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc) { unwrap(Inst)->setDebugLoc(DebugLoc()); } +LLVMMetadataRef LLVMDIBuilderCreateLabel( + LLVMDIBuilderRef Builder, + LLVMMetadataRef Context, const char *Name, size_t NameLen, + LLVMMetadataRef File, unsigned LineNo, LLVMBool AlwaysPreserve) { + return wrap(unwrap(Builder)->createLabel( + unwrapDI(Context), StringRef(Name, NameLen), + unwrapDI(File), LineNo, AlwaysPreserve)); +} + +LLVMDbgRecordRef LLVMDIBuilderInsertLabelBefore( + LLVMDIBuilderRef Builder, LLVMMetadataRef LabelInfo, + LLVMMetadataRef Location, LLVMValueRef InsertBefore) { + DbgInstPtr DbgInst = unwrap(Builder)->insertLabel( + unwrapDI(LabelInfo), unwrapDI(Location), + unwrap(InsertBefore)); + // This assert will fail if the module is in the old debug info format. + // This function should only be called if the module is in the new + // debug info format. + // See https://llvm.org/docs/RemoveDIsDebugInfo.html#c-api-changes, + // LLVMIsNewDbgInfoFormat, and LLVMSetIsNewDbgInfoFormat for more info. + assert(isa(DbgInst) && + "Function unexpectedly in old debug info format"); + return wrap(cast(DbgInst)); +} + +LLVMDbgRecordRef LLVMDIBuilderInsertLabelAtEnd( + LLVMDIBuilderRef Builder, LLVMMetadataRef LabelInfo, + LLVMMetadataRef Location, LLVMBasicBlockRef InsertAtEnd) { + DbgInstPtr DbgInst = unwrap(Builder)->insertLabel( + unwrapDI(LabelInfo), unwrapDI(Location), + unwrap(InsertAtEnd)); + // This assert will fail if the module is in the old debug info format. + // This function should only be called if the module is in the new + // debug info format. + // See https://llvm.org/docs/RemoveDIsDebugInfo.html#c-api-changes, + // LLVMIsNewDbgInfoFormat, and LLVMSetIsNewDbgInfoFormat for more info. + assert(isa(DbgInst) && + "Function unexpectedly in old debug info format"); + return wrap(cast(DbgInst)); +} + LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata) { switch(unwrap(Metadata)->getMetadataID()) { #define HANDLE_METADATA_LEAF(CLASS) \ diff --git a/llvm/test/Bindings/llvm-c/debug_info_new_format.ll b/llvm/test/Bindings/llvm-c/debug_info_new_format.ll index e7f537aa4f1a9aa..a3f4bb2421fa260 100644 --- a/llvm/test/Bindings/llvm-c/debug_info_new_format.ll +++ b/llvm/test/Bindings/llvm-c/debug_info_new_format.ll @@ -6,15 +6,18 @@ ; CHECK: define i64 @foo(i64 %0, i64 %1, <10 x i64> %2) !dbg !31 { ; CHECK-NEXT: entry: -; CHECK-NEXT: #dbg_declare(i64 0, !38, !DIExpression(), !44) -; CHECK-NEXT: #dbg_declare(i64 0, !39, !DIExpression(), !44) -; CHECK-NEXT: #dbg_declare(i64 0, !40, !DIExpression(), !44) +; CHECK-NEXT: #dbg_declare(i64 0, !38, !DIExpression(), !45) +; CHECK-NEXT: #dbg_declare(i64 0, !39, !DIExpression(), !45) +; CHECK-NEXT: #dbg_declare(i64 0, !40, !DIExpression(), !45) +; CHECK-NEXT: #dbg_label(!46, !45) +; CHECK-NEXT: br label %vars +; CHECK-NEXT: #dbg_label(!47, !45) ; CHECK-NEXT: br label %vars ; CHECK: vars: ; CHECK-NEXT: %p1 = phi i64 [ 0, %entry ] ; CHECK-NEXT: %p2 = phi i64 [ 0, %entry ] -; CHECK-NEXT: #dbg_value(i64 0, !41, !DIExpression(DW_OP_constu, 0, DW_OP_stack_value), !45) -; CHECK-NEXT: #dbg_value(i64 1, !43, !DIExpression(DW_OP_constu, 1, DW_OP_stack_value), !45) +; CHECK-NEXT: #dbg_value(i64 0, !41, !DIExpression(DW_OP_constu, 0, DW_OP_stack_value), !48) +; CHECK-NEXT: #dbg_value(i64 1, !43, !DIExpression(DW_OP_constu, 1, DW_OP_stack_value), !48) ; CHECK-NEXT: %a = add i64 %p1, %p2 ; CHECK-NEXT: ret i64 0 ; CHECK-NEXT: } @@ -60,12 +63,15 @@ ; CHECK-NEXT: !34 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 640, flags: DIFlagVector, elements: !35) ; CHECK-NEXT: !35 = !{!36} ; CHECK-NEXT: !36 = !DISubrange(count: 10, lowerBound: 0) -; CHECK-NEXT: !37 = !{!38, !39, !40, !41, !43} +; CHECK-NEXT: !37 = !{!38, !39, !40, !41, !43, !44} ; CHECK-NEXT: !38 = !DILocalVariable(name: "a", arg: 1, scope: !31, file: !1, line: 42, type: !6) ; CHECK-NEXT: !39 = !DILocalVariable(name: "b", arg: 2, scope: !31, file: !1, line: 42, type: !6) ; CHECK-NEXT: !40 = !DILocalVariable(name: "c", arg: 3, scope: !31, file: !1, line: 42, type: !34) ; CHECK-NEXT: !41 = !DILocalVariable(name: "d", scope: !42, file: !1, line: 43, type: !6) ; CHECK-NEXT: !42 = distinct !DILexicalBlock(scope: !31, file: !1, line: 42) ; CHECK-NEXT: !43 = !DILocalVariable(name: "e", scope: !42, file: !1, line: 44, type: !6) -; CHECK-NEXT: !44 = !DILocation(line: 42, scope: !31) -; CHECK-NEXT: !45 = !DILocation(line: 43, scope: !31) +; CHECK-NEXT: !44 = !DILabel(scope: !31, name: "label3", file: !1, line: 42) +; CHECK-NEXT: !45 = !DILocation(line: 42, scope: !31) +; CHECK-NEXT: !46 = !DILabel(scope: !31, name: "label1", file: !1, line: 42) +; CHECK-NEXT: !47 = !DILabel(scope: !31, name: "label2", file: !1, line: 42) +; CHECK-NEXT: !48 = !DILocation(line: 43, scope: !31) diff --git a/llvm/tools/llvm-c-test/debuginfo.c b/llvm/tools/llvm-c-test/debuginfo.c index 942cbe5e5900e41..baf4ddfcc9a37bf 100644 --- a/llvm/tools/llvm-c-test/debuginfo.c +++ b/llvm/tools/llvm-c-test/debuginfo.c @@ -163,6 +163,11 @@ int llvm_test_dibuilder(void) { LLVMSetSubprogram(FooFunction, FunctionMetadata); + LLVMMetadataRef FooLabel1 = LLVMDIBuilderCreateLabel(DIB, FunctionMetadata, + "label1", 6, File, 42, false); + LLVMDIBuilderInsertLabelAtEnd(DIB, FooLabel1, FooParamLocation, + FooEntryBlock); + LLVMMetadataRef FooLexicalBlock = LLVMDIBuilderCreateLexicalBlock(DIB, FunctionMetadata, File, 42, 0); @@ -210,8 +215,6 @@ int llvm_test_dibuilder(void) { LLVMAddNamedMetadataOperand( M, "EnumTest", LLVMMetadataAsValue(LLVMGetModuleContext(M), EnumTest)); - LLVMDIBuilderFinalize(DIB); - // Using the new debug format, debug records get attached to instructions. // Insert a `br` and `ret` now to absorb the debug records which are // currently "trailing", meaning that they're associated with a block @@ -221,6 +224,20 @@ int llvm_test_dibuilder(void) { LLVMPositionBuilderAtEnd(Builder, FooEntryBlock); // Build `br label %vars` in entry. LLVMBuildBr(Builder, FooVarBlock); + + // Build another br for the sake of testing labels. + LLVMMetadataRef FooLabel2 = LLVMDIBuilderCreateLabel(DIB, FunctionMetadata, + "label2", 6, File, 42, false); + LLVMDIBuilderInsertLabelBefore(DIB, FooLabel2, FooParamLocation, + LLVMBuildBr(Builder, FooVarBlock)); + // label3 will be emitted, but label4 won't be emitted + // because label3 is AlwaysPreserve and label4 is not. + LLVMDIBuilderCreateLabel(DIB, FunctionMetadata, + "label3", 6, File, 42, true); + LLVMDIBuilderCreateLabel(DIB, FunctionMetadata, + "label4", 6, File, 42, false); + LLVMDIBuilderFinalize(DIB); + // Build `ret i64 0` in vars. LLVMPositionBuilderAtEnd(Builder, FooVarBlock); LLVMTypeRef I64 = LLVMInt64TypeInContext(Ctx); From 19c0a74ad6baa9eb38dbe0a20af7c67999c41821 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Mon, 28 Oct 2024 11:13:00 -0700 Subject: [PATCH 201/425] [lldb] Fix lldb windows build breakage from https://github.com/llvm/llvm-project/pull/113839. --- lldb/tools/lldb-dap/JSONUtils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index fd09e4ae505e6d2..97fe6b4f9f05db7 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -21,6 +21,7 @@ #include "lldb/API/SBStringList.h" #include "lldb/API/SBStructuredData.h" #include "lldb/API/SBValue.h" +#include "lldb/Host/PosixApi.h" #include "DAP.h" #include "ExceptionBreakpoint.h" From da1a16ae10177494c7cae929bec987e90a160403 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Mon, 28 Oct 2024 11:24:27 -0700 Subject: [PATCH 202/425] [clang][modules] Preserve the module map that allowed inferring (#113389) With inferred modules, the dependency scanner takes care to replace the fake "__inferred_module.map" path with the file that allowed the module to be inferred. However, this only worked when such a module was imported directly in the TU. Whenever such module got loaded transitively, the scanner would fail to perform the replacement. This is caused by the fact that PCM files are lossy and drop this information. This patch makes sure that PCMs include this file for each submodule (in the `SUBMODULE_DEFINITION` record), fixes one existing test with an incorrect assertion, and does a little drive-by refactoring of `ModuleMap`. --- .../include/clang/Serialization/ASTBitCodes.h | 4 ++-- clang/include/clang/Serialization/ASTReader.h | 2 ++ clang/lib/Frontend/FrontendAction.cpp | 1 - clang/lib/Lex/ModuleMap.cpp | 11 ++++------- clang/lib/Serialization/ASTReader.cpp | 5 +++-- clang/lib/Serialization/ASTWriter.cpp | 8 ++++++++ .../DependencyScanning/ModuleDepCollector.cpp | 18 +++++++----------- clang/test/ClangScanDeps/link-libraries.c | 7 +++---- 8 files changed, 29 insertions(+), 27 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 3ddbc5fcd26c44f..b6193866fc71346 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -44,7 +44,7 @@ namespace serialization { /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 31; +const unsigned VERSION_MAJOR = 32; /// AST file minor version number supported by this version of /// Clang. @@ -54,7 +54,7 @@ const unsigned VERSION_MAJOR = 31; /// for the previous version could still support reading the new /// version by ignoring new kinds of subblocks), this number /// should be increased. -const unsigned VERSION_MINOR = 1; +const unsigned VERSION_MINOR = 0; /// An ID number that refers to an identifier in an AST file. /// diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index b476a40ebd2c8c3..070c1c9a54f48c6 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2335,6 +2335,8 @@ class ASTReader /// Translate a FileID from another module file's FileID space into ours. FileID TranslateFileID(ModuleFile &F, FileID FID) const { assert(FID.ID >= 0 && "Reading non-local FileID."); + if (FID.isInvalid()) + return FID; return FileID::get(F.SLocEntryBaseID + FID.ID - 1); } diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 8264bd702fe43fb..9a50e7453eb61a5 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -534,7 +534,6 @@ static Module *prepareToBuildModule(CompilerInstance &CI, } if (*OriginalModuleMap != CI.getSourceManager().getFileEntryRefForID( CI.getSourceManager().getMainFileID())) { - M->IsInferred = true; auto FileCharacter = M->IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap; FileID OriginalModuleMapFID = CI.getSourceManager().getOrCreateFileID( diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index bc76a54abd95adf..201ab91cf68ca16 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -657,8 +657,7 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { llvm::sys::path::stem(SkippedDir.getName()), NameBuf); Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit).first; - InferredModuleAllowedBy[Result] = UmbrellaModuleMap; - Result->IsInferred = true; + setInferredModuleAllowedBy(Result, UmbrellaModuleMap); // Associate the module and the directory. UmbrellaDirs[SkippedDir] = Result; @@ -675,8 +674,7 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { llvm::sys::path::stem(File.getName()), NameBuf); Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit).first; - InferredModuleAllowedBy[Result] = UmbrellaModuleMap; - Result->IsInferred = true; + setInferredModuleAllowedBy(Result, UmbrellaModuleMap); Result->addTopHeader(File); // If inferred submodules export everything they import, add a @@ -1097,8 +1095,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir, Module *Result = new (ModulesAlloc.Allocate()) Module(ModuleConstructorTag{}, ModuleName, SourceLocation(), Parent, /*IsFramework=*/true, /*IsExplicit=*/false, NumCreatedModules++); - InferredModuleAllowedBy[Result] = ModuleMapFID; - Result->IsInferred = true; + setInferredModuleAllowedBy(Result, ModuleMapFID); if (!Parent) { if (LangOpts.CurrentModule == ModuleName) SourceModule = Result; @@ -1346,7 +1343,7 @@ ModuleMap::getModuleMapFileForUniquing(const Module *M) const { } void ModuleMap::setInferredModuleAllowedBy(Module *M, FileID ModMapFID) { - assert(M->IsInferred && "module not inferred"); + M->IsInferred = true; InferredModuleAllowedBy[M] = ModMapFID; } diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 7d9170e7f0b4797..2419ed84e68acf8 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5813,6 +5813,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]); Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++]; SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]); + FileID InferredAllowedBy = ReadFileID(F, Record, Idx); bool IsFramework = Record[Idx++]; bool IsExplicit = Record[Idx++]; bool IsSystem = Record[Idx++]; @@ -5834,8 +5835,6 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit) .first; - // FIXME: Call ModMap.setInferredModuleAllowedBy() - SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; if (GlobalIndex >= SubmodulesLoaded.size() || SubmodulesLoaded[GlobalIndex]) @@ -5866,6 +5865,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, CurrentModule->DefinitionLoc = DefinitionLoc; CurrentModule->Signature = F.Signature; CurrentModule->IsFromModuleFile = true; + if (InferredAllowedBy.isValid()) + ModMap.setInferredModuleAllowedBy(CurrentModule, InferredAllowedBy); CurrentModule->IsSystem = IsSystem || CurrentModule->IsSystem; CurrentModule->IsExternC = IsExternC; CurrentModule->InferSubmodules = InferSubmodules; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index c09a41f4d1403cf..569c688f793d81a 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2914,6 +2914,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Kind Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Definition location + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Inferred allowed by Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExplicit Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystem @@ -3018,6 +3019,12 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { SourceLocationEncoding::RawLocEncoding DefinitionLoc = getRawSourceLocationEncoding(getAdjustedLocation(Mod->DefinitionLoc)); + ModuleMap &ModMap = PP->getHeaderSearchInfo().getModuleMap(); + FileID UnadjustedInferredFID; + if (Mod->IsInferred) + UnadjustedInferredFID = ModMap.getModuleMapFileIDForUniquing(Mod); + int InferredFID = getAdjustedFileID(UnadjustedInferredFID).getOpaqueValue(); + // Emit the definition of the block. { RecordData::value_type Record[] = {SUBMODULE_DEFINITION, @@ -3025,6 +3032,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { ParentID, (RecordData::value_type)Mod->Kind, DefinitionLoc, + (RecordData::value_type)InferredFID, Mod->IsFramework, Mod->IsExplicit, Mod->IsSystem, diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index 77f9d07175c2c15..637416cd1fc621f 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -587,9 +587,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { ModuleMap &ModMapInfo = MDC.ScanInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap(); - OptionalFileEntryRef ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M); - - if (ModuleMap) { + if (auto ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M)) { SmallString<128> Path = ModuleMap->getNameAsRequested(); ModMapInfo.canonicalizeModuleMapPath(Path); MD.ClangModuleMapFile = std::string(Path); @@ -601,15 +599,13 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) { MDC.ScanInstance.getASTReader()->visitInputFileInfos( *MF, /*IncludeSystem=*/true, [&](const serialization::InputFileInfo &IFI, bool IsSystem) { - // __inferred_module.map is the result of the way in which an implicit - // module build handles inferred modules. It adds an overlay VFS with - // this file in the proper directory and relies on the rest of Clang to - // handle it like normal. With explicitly built modules we don't need - // to play VFS tricks, so replace it with the correct module map. - if (StringRef(IFI.Filename).ends_with("__inferred_module.map")) { - MDC.addFileDep(MD, ModuleMap->getName()); + // The __inferred_module.map file is an insignificant implementation + // detail of implicitly-built modules. The PCM will also report the + // actual on-disk module map file that allowed inferring the module, + // which is what we need for building the module explicitly + // Let's ignore this file. + if (StringRef(IFI.Filename).ends_with("__inferred_module.map")) return; - } MDC.addFileDep(MD, IFI.Filename); }); diff --git a/clang/test/ClangScanDeps/link-libraries.c b/clang/test/ClangScanDeps/link-libraries.c index c09691d2356efcc..bc0b0c546ea032d 100644 --- a/clang/test/ClangScanDeps/link-libraries.c +++ b/clang/test/ClangScanDeps/link-libraries.c @@ -39,14 +39,13 @@ module transitive { // CHECK-NEXT: "modules": [ // CHECK-NEXT: { // CHECK-NEXT: "clang-module-deps": [], -// CHECK-NEXT: "clang-modulemap-file": "{{.*}}/__inferred_module.map", +// CHECK-NEXT: "clang-modulemap-file": "[[PREFIX]]/Inputs/frameworks/module.modulemap", // CHECK-NEXT: "command-line": [ // CHECK: ], // CHECK-NEXT: "context-hash": "{{.*}}", // CHECK-NEXT: "file-deps": [ -// CHECK-NEXT: "{{.*}}/Framework.h" -// CHECK-NEXT: "{{.*}}/__inferred_module.map" -// CHECK-NEXT: "{{.*}}/module.modulemap" +// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/Framework.framework/Headers/Framework.h" +// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/module.modulemap" // CHECK-NEXT: ], // CHECK-NEXT: "link-libraries": [ // CHECK-NEXT: { From cafd3e10c39a2bfc81eac33ee56a706476e676a9 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Mon, 28 Oct 2024 11:30:30 -0700 Subject: [PATCH 203/425] [BOLT][test] Fix NFC check with pre-aggregated-perf.test (#113944) NFC checks have been failing starting with https://lab.llvm.org/buildbot/#/builders/92/builds/8567. NFC testing wrapper (llvm-bolt-wrapper) replaces the call of `perf2bolt` with `llvm-bolt --aggregate-only --ignore-build-id`. `show-density` is automatically enabled for perf2bolt only but not for `llvm-bolt --aggregate-only`. Add the flag to the test to work around the issue. Test Plan: ``` cd build ../llvm-project/bolt/utils/nfc-check-setup.py --switch-back --verbose bin/llvm-lit -a tools/bolt/test/X86/pre-aggregated-perf.test ``` --- bolt/test/X86/pre-aggregated-perf.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test index 3242ba22f591642..cf745ca7bf7b62a 100644 --- a/bolt/test/X86/pre-aggregated-perf.test +++ b/bolt/test/X86/pre-aggregated-perf.test @@ -11,6 +11,7 @@ REQUIRES: system-linux RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \ +RUN: --show-density \ RUN: --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \ RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B @@ -18,6 +19,7 @@ CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts. RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \ +RUN: --show-density \ RUN: --profile-density-cutoff-hot=970000 \ RUN: --profile-use-dfs 2>&1 | FileCheck %s --check-prefix=CHECK-WARNING From e0a02fdb459f3126fbc40cf376f4a3871652ae49 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 28 Oct 2024 11:43:28 -0700 Subject: [PATCH 204/425] [NFC][Clang][TableGen] Use StringRef in FlattenedSpelling (#113809) - Change FlattenedSpelling to use StringRef instead of std::String. - Use range for loops and enumerate(). - Use ArrayRef<> instead of std::vector reference as function arguments. - Use {} for all if/else branch bodies if one of them uses it. --- clang/utils/TableGen/ClangAttrEmitter.cpp | 245 ++++++++++------------ 1 file changed, 109 insertions(+), 136 deletions(-) diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 4890d249c6d8f78..cf9c70a93e5db22 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -49,40 +49,38 @@ using namespace llvm; namespace { class FlattenedSpelling { - std::string V, N, NS; + StringRef V, N, NS; bool K = false; const Record &OriginalSpelling; public: - FlattenedSpelling(const std::string &Variety, const std::string &Name, - const std::string &Namespace, bool KnownToGCC, - const Record &OriginalSpelling) + FlattenedSpelling(StringRef Variety, StringRef Name, StringRef Namespace, + bool KnownToGCC, const Record &OriginalSpelling) : V(Variety), N(Name), NS(Namespace), K(KnownToGCC), OriginalSpelling(OriginalSpelling) {} explicit FlattenedSpelling(const Record &Spelling) - : V(std::string(Spelling.getValueAsString("Variety"))), - N(std::string(Spelling.getValueAsString("Name"))), - OriginalSpelling(Spelling) { + : V(Spelling.getValueAsString("Variety")), + N(Spelling.getValueAsString("Name")), OriginalSpelling(Spelling) { assert(V != "GCC" && V != "Clang" && "Given a GCC spelling, which means this hasn't been flattened!"); if (V == "CXX11" || V == "C23" || V == "Pragma") - NS = std::string(Spelling.getValueAsString("Namespace")); + NS = Spelling.getValueAsString("Namespace"); } - const std::string &variety() const { return V; } - const std::string &name() const { return N; } - const std::string &nameSpace() const { return NS; } + StringRef variety() const { return V; } + StringRef name() const { return N; } + StringRef nameSpace() const { return NS; } bool knownToGCC() const { return K; } const Record &getSpellingRecord() const { return OriginalSpelling; } }; struct FlattenedSpellingInfo { - FlattenedSpellingInfo(std::string Syntax, std::string Scope, - std::string TargetTest, uint32_t ArgMask) + FlattenedSpellingInfo(StringRef Syntax, StringRef Scope, + const std::string &TargetTest, uint32_t ArgMask) : Syntax(Syntax), Scope(Scope), TargetTest(TargetTest), ArgMask(ArgMask) { } - std::string Syntax; - std::string Scope; + StringRef Syntax; + StringRef Scope; std::string TargetTest; uint32_t ArgMask; }; @@ -105,17 +103,18 @@ GetFlattenedSpellings(const Record &Attr) { StringRef Variety = Spelling->getValueAsString("Variety"); StringRef Name = Spelling->getValueAsString("Name"); if (Variety == "GCC") { - Ret.emplace_back("GNU", std::string(Name), "", true, *Spelling); - Ret.emplace_back("CXX11", std::string(Name), "gnu", true, *Spelling); + Ret.emplace_back("GNU", Name, "", true, *Spelling); + Ret.emplace_back("CXX11", Name, "gnu", true, *Spelling); if (Spelling->getValueAsBit("AllowInC")) - Ret.emplace_back("C23", std::string(Name), "gnu", true, *Spelling); + Ret.emplace_back("C23", Name, "gnu", true, *Spelling); } else if (Variety == "Clang") { - Ret.emplace_back("GNU", std::string(Name), "", false, *Spelling); - Ret.emplace_back("CXX11", std::string(Name), "clang", false, *Spelling); + Ret.emplace_back("GNU", Name, "", false, *Spelling); + Ret.emplace_back("CXX11", Name, "clang", false, *Spelling); if (Spelling->getValueAsBit("AllowInC")) - Ret.emplace_back("C23", std::string(Name), "clang", false, *Spelling); - } else + Ret.emplace_back("C23", Name, "clang", false, *Spelling); + } else { Ret.push_back(FlattenedSpelling(*Spelling)); + } } return Ret; @@ -1554,7 +1553,7 @@ static void writeAvailabilityValue(raw_ostream &OS) { << " OS << \""; } -static void writeDeprecatedAttrValue(raw_ostream &OS, std::string &Variety) { +static void writeDeprecatedAttrValue(raw_ostream &OS, StringRef Variety) { OS << "\\\"\" << getMessage() << \"\\\"\";\n"; // Only GNU deprecated has an optional fixit argument at the second position. if (Variety == "GNU") @@ -1577,9 +1576,12 @@ static void writeGetSpellingFunction(const Record &R, raw_ostream &OS) { " llvm_unreachable(\"Unknown attribute spelling!\");\n" " return \"(No spelling)\";\n"; - for (unsigned I = 0; I < Spellings.size(); ++I) - OS << " case " << I << ":\n" - " return \"" << Spellings[I].name() << "\";\n"; + for (const auto &[Idx, S] : enumerate(Spellings)) { + // clang-format off + OS << " case " << Idx << ":\n" + " return \"" << S.name() << "\";\n"; + // clang-format on + } // End of the switch statement. OS << " }\n"; // End of the getSpelling function. @@ -1607,14 +1609,14 @@ writePrettyPrintFunction(const Record &R, << " llvm_unreachable(\"Unknown attribute spelling!\");\n" << " break;\n"; - for (unsigned I = 0; I < Spellings.size(); ++ I) { + for (const auto &[Idx, S] : enumerate(Spellings)) { SmallString<16> Prefix; SmallString<8> Suffix; // The actual spelling of the name and namespace (if applicable) // of an attribute without considering prefix and suffix. SmallString<64> Spelling; - std::string Name = Spellings[I].name(); - std::string Variety = Spellings[I].variety(); + StringRef Name = S.name(); + StringRef Variety = S.variety(); if (Variety == "GNU") { Prefix = "__attribute__(("; @@ -1622,7 +1624,7 @@ writePrettyPrintFunction(const Record &R, } else if (Variety == "CXX11" || Variety == "C23") { Prefix = "[["; Suffix = "]]"; - std::string Namespace = Spellings[I].nameSpace(); + StringRef Namespace = S.nameSpace(); if (!Namespace.empty()) { Spelling += Namespace; Spelling += "::"; @@ -1639,7 +1641,7 @@ writePrettyPrintFunction(const Record &R, } else if (Variety == "Pragma") { Prefix = "#pragma "; Suffix = "\n"; - std::string Namespace = Spellings[I].nameSpace(); + StringRef Namespace = S.nameSpace(); if (!Namespace.empty()) { Spelling += Namespace; Spelling += " "; @@ -1653,7 +1655,7 @@ writePrettyPrintFunction(const Record &R, Spelling += Name; - OS << " case " << I << " : {\n" + OS << " case " << Idx << " : {\n" << " OS << \"" << Prefix << Spelling << "\";\n"; if (Variety == "Pragma") { @@ -1724,24 +1726,17 @@ writePrettyPrintFunction(const Record &R, } /// Return the index of a spelling in a spelling list. -static unsigned -getSpellingListIndex(const std::vector &SpellingList, - const FlattenedSpelling &Spelling) { +static unsigned getSpellingListIndex(ArrayRef SpellingList, + const FlattenedSpelling &Spelling) { assert(!SpellingList.empty() && "Spelling list is empty!"); - for (unsigned Index = 0; Index < SpellingList.size(); ++Index) { - const FlattenedSpelling &S = SpellingList[Index]; - if (S.variety() != Spelling.variety()) - continue; - if (S.nameSpace() != Spelling.nameSpace()) - continue; - if (S.name() != Spelling.name()) - continue; - - return Index; + for (const auto &[Index, S] : enumerate(SpellingList)) { + if (S.variety() == Spelling.variety() && + S.nameSpace() == Spelling.nameSpace() && S.name() == Spelling.name()) + return Index; } - llvm_unreachable("Unknown spelling!"); + PrintFatalError("Unknown spelling: " + Spelling.name()); } static void writeAttrAccessorDefinition(const Record &R, raw_ostream &OS) { @@ -1802,15 +1797,15 @@ CreateSemanticSpellings(const std::vector &Spellings, "AttributeCommonInfo"); for (auto I = Spellings.begin(), E = Spellings.end(); I != E; ++I, ++Idx) { const FlattenedSpelling &S = *I; - const std::string &Variety = S.variety(); - const std::string &Spelling = S.name(); - const std::string &Namespace = S.nameSpace(); + StringRef Variety = S.variety(); + StringRef Spelling = S.name(); + StringRef Namespace = S.nameSpace(); std::string EnumName; - EnumName += (Variety + "_"); + EnumName += Variety; + EnumName += "_"; if (!Namespace.empty()) - EnumName += (NormalizeNameForSpellingComparison(Namespace).str() + - "_"); + EnumName += NormalizeNameForSpellingComparison(Namespace).str() + "_"; EnumName += NormalizeNameForSpellingComparison(Spelling); // Even if the name is not unique, this spelling index corresponds to a @@ -1837,7 +1832,7 @@ CreateSemanticSpellings(const std::vector &Spellings, return Ret; } -void WriteSemanticSpellingSwitch(const std::string &VarName, +void WriteSemanticSpellingSwitch(StringRef VarName, const SemanticSpellingMap &Map, raw_ostream &OS) { OS << " switch (" << VarName << ") {\n default: " @@ -2378,30 +2373,22 @@ void PragmaClangAttributeSupport::generateParsingHelpers(raw_ostream &OS) { } template static void forEachSpelling(const Record &Attr, Fn &&F) { - std::vector Spellings = GetFlattenedSpellings(Attr); - for (const FlattenedSpelling &S : Spellings) { + for (const FlattenedSpelling &S : GetFlattenedSpellings(Attr)) { F(S); } } -std::map> NameToAttrsMap; +std::map> NameToAttrsMap; /// Build a map from the attribute name to the Attrs that use that name. If more /// than one Attr use a name, the arguments could be different so a more complex /// check is needed in the generated switch. void generateNameToAttrsMap(const RecordKeeper &Records) { for (const auto *A : Records.getAllDerivedDefinitions("Attr")) { - std::vector Spellings = GetFlattenedSpellings(*A); - for (const auto &S : Spellings) { - auto It = NameToAttrsMap.find(S.name()); - if (It != NameToAttrsMap.end()) { - if (none_of(It->second, [&](const Record *R) { return R == A; })) - It->second.emplace_back(A); - } else { - std::vector V; - V.emplace_back(A); - NameToAttrsMap.insert(std::make_pair(S.name(), V)); - } + for (const FlattenedSpelling &S : GetFlattenedSpellings(*A)) { + auto [It, Inserted] = NameToAttrsMap.try_emplace(S.name()); + if (Inserted || !is_contained(It->second, A)) + It->second.emplace_back(A); } } } @@ -2410,7 +2397,7 @@ void generateNameToAttrsMap(const RecordKeeper &Records) { /// attribute has the same name. Store the info in a map that can be processed /// after all attributes are seen. static void generateFlattenedSpellingInfo(const Record &Attr, - std::map &Map, + std::map &Map, uint32_t ArgMask = 0) { std::string TargetTest; if (Attr.isSubClassOf("TargetSpecificAttr") && @@ -2421,24 +2408,17 @@ static void generateFlattenedSpellingInfo(const Record &Attr, } forEachSpelling(Attr, [&](const FlattenedSpelling &S) { - auto It = Map.find(S.name()); - if (It != Map.end()) { - It->second.emplace_back(S.variety(), S.nameSpace(), TargetTest, ArgMask); - } else { - FSIVecTy V; - V.emplace_back(S.variety(), S.nameSpace(), TargetTest, ArgMask); - Map.insert(std::make_pair(S.name(), V)); - } + Map[S.name()].emplace_back(S.variety(), S.nameSpace(), TargetTest, ArgMask); }); } -static bool nameAppliesToOneAttribute(std::string Name) { +static bool nameAppliesToOneAttribute(StringRef Name) { auto It = NameToAttrsMap.find(Name); assert(It != NameToAttrsMap.end()); return It->second.size() == 1; } -static bool emitIfSimpleValue(std::string Name, uint32_t ArgMask, +static bool emitIfSimpleValue(StringRef Name, uint32_t ArgMask, raw_ostream &OS) { if (nameAppliesToOneAttribute(Name)) { OS << ".Case(\"" << Name << "\", "; @@ -2463,15 +2443,13 @@ static void emitSingleCondition(const FlattenedSpellingInfo &FSI, OS << ")"; } -static void emitStringSwitchCases(std::map &Map, +static void emitStringSwitchCases(std::map &Map, raw_ostream &OS) { - for (const auto &P : Map) { - if (emitIfSimpleValue(P.first, P.second[0].ArgMask, OS)) + for (const auto &[Name, Vec] : Map) { + if (emitIfSimpleValue(Name, Vec[0].ArgMask, OS)) continue; // Not simple, build expressions for each case. - StringRef Name = P.first; - const FSIVecTy &Vec = P.second; OS << ".Case(\"" << Name << "\", "; for (unsigned I = 0, E = Vec.size(); I < E; ++I) { emitSingleCondition(Vec[I], OS); @@ -2498,7 +2476,7 @@ static bool isTypeArgument(const Record *Arg) { static void emitClangAttrTypeArgList(const RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_TYPE_ARG_LIST)\n"; - std::map FSIMap; + std::map FSIMap; for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) { // Determine whether the first argument is a type. std::vector Args = Attr->getValueAsListOfDefs("Args"); @@ -2518,7 +2496,7 @@ static void emitClangAttrTypeArgList(const RecordKeeper &Records, static void emitClangAttrArgContextList(const RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_ARG_CONTEXT_LIST)\n"; - std::map FSIMap; + std::map FSIMap; ParsedAttrMap Attrs = getParsedAttrList(Records); for (const auto &I : Attrs) { const Record &Attr = *I.second; @@ -2576,7 +2554,7 @@ static bool isVariadicStringLiteralArgument(const Record *Arg) { static void emitClangAttrVariadicIdentifierArgList(const RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST)\n"; - std::map FSIMap; + std::map FSIMap; for (const auto *A : Records.getAllDerivedDefinitions("Attr")) { // Determine whether the first argument is a variadic identifier. std::vector Args = A->getValueAsListOfDefs("Args"); @@ -2609,7 +2587,7 @@ emitClangAttrUnevaluatedStringLiteralList(const RecordKeeper &Records, return Bits; }; - std::map FSIMap; + std::map FSIMap; for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) { // Determine whether there are any string arguments. uint32_t ArgMask = MakeMask(Attr->getValueAsListOfDefs("Args")); @@ -2625,7 +2603,7 @@ emitClangAttrUnevaluatedStringLiteralList(const RecordKeeper &Records, static void emitClangAttrIdentifierArgList(const RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_IDENTIFIER_ARG_LIST)\n"; - std::map FSIMap; + std::map FSIMap; for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) { // Determine whether the first argument is an identifier. std::vector Args = Attr->getValueAsListOfDefs("Args"); @@ -2641,7 +2619,7 @@ static void emitClangAttrIdentifierArgList(const RecordKeeper &Records, static void emitClangAttrStrictIdentifierArgList(const RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_STRICT_IDENTIFIER_ARG_LIST)\n"; - std::map FSIMap; + std::map FSIMap; for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) { if (!Attr->getValueAsBit("StrictEnumParameters")) continue; @@ -2665,7 +2643,7 @@ static bool keywordThisIsaIdentifierInArgument(const Record *Arg) { static void emitClangAttrThisIsaIdentifierArgList(const RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_THIS_ISA_IDENTIFIER_ARG_LIST)\n"; - std::map FSIMap; + std::map FSIMap; for (const auto *A : Records.getAllDerivedDefinitions("Attr")) { // Determine whether the first argument is a variadic identifier. std::vector Args = A->getValueAsListOfDefs("Args"); @@ -2681,7 +2659,7 @@ static void emitClangAttrAcceptsExprPack(const RecordKeeper &Records, raw_ostream &OS) { OS << "#if defined(CLANG_ATTR_ACCEPTS_EXPR_PACK)\n"; ParsedAttrMap Attrs = getParsedAttrList(Records); - std::map FSIMap; + std::map FSIMap; for (const auto &I : Attrs) { const Record &Attr = *I.second; @@ -3678,9 +3656,8 @@ static bool GenerateTargetSpecificAttrChecks(const Record *R, } static void GenerateHasAttrSpellingStringSwitch( - const std::vector> &Attrs, - raw_ostream &OS, const std::string &Variety, - const std::string &Scope = "") { + ArrayRef> Attrs, + raw_ostream &OS, StringRef Variety, StringRef Scope = "") { for (const auto &[Attr, Spelling] : Attrs) { // C++11-style attributes have specific version information associated with // them. If the attribute has no scope, the version information must not @@ -3783,18 +3760,15 @@ void EmitClangAttrHasAttrImpl(const RecordKeeper &Records, raw_ostream &OS) { // Separate all of the attributes out into four group: generic, C++11, GNU, // and declspecs. Then generate a big switch statement for each of them. - std::vector> Declspec, Microsoft, - GNU, Pragma, HLSLAnnotation; - std::map>> - CXX, C23; + using PairTy = std::pair; + std::vector Declspec, Microsoft, GNU, Pragma, HLSLAnnotation; + std::map> CXX, C23; // Walk over the list of all attributes, and split them out based on the // spelling variety. for (auto *R : Records.getAllDerivedDefinitions("Attr")) { - std::vector Spellings = GetFlattenedSpellings(*R); - for (const auto &SI : Spellings) { - const std::string &Variety = SI.variety(); + for (const FlattenedSpelling &SI : GetFlattenedSpellings(*R)) { + StringRef Variety = SI.variety(); if (Variety == "GNU") GNU.emplace_back(R, SI); else if (Variety == "Declspec") @@ -3829,22 +3803,16 @@ void EmitClangAttrHasAttrImpl(const RecordKeeper &Records, raw_ostream &OS) { OS << "case AttributeCommonInfo::Syntax::AS_HLSLAnnotation:\n"; OS << " return llvm::StringSwitch(Name)\n"; GenerateHasAttrSpellingStringSwitch(HLSLAnnotation, OS, "HLSLAnnotation"); - auto fn = [&OS](const char *Spelling, - const std::map< - std::string, - std::vector>> - &List) { + auto fn = [&OS](StringRef Spelling, + const std::map> &Map) { OS << "case AttributeCommonInfo::Syntax::AS_" << Spelling << ": {\n"; // C++11-style attributes are further split out based on the Scope. - for (auto I = List.cbegin(), E = List.cend(); I != E; ++I) { - if (I != List.cbegin()) - OS << " else "; - if (I->first.empty()) - OS << "if (ScopeName == \"\") {\n"; - else - OS << "if (ScopeName == \"" << I->first << "\") {\n"; + ListSeparator LS(" else "); + for (const auto &[Scope, List] : Map) { + OS << LS; + OS << "if (ScopeName == \"" << Scope << "\") {\n"; OS << " return llvm::StringSwitch(Name)\n"; - GenerateHasAttrSpellingStringSwitch(I->second, OS, Spelling, I->first); + GenerateHasAttrSpellingStringSwitch(List, OS, Spelling, Scope); OS << "}"; } OS << "\n} break;\n"; @@ -4424,7 +4392,7 @@ static void GenerateMutualExclusionsChecks(const Record &Attr, MergeStmtOS << " auto Iter = llvm::find_if(C, [](const Attr *Check) " << "{ return isa<"; interleave( - StmtAttrs, [&](const std::string &Name) { MergeStmtOS << Name; }, + StmtAttrs, [&](StringRef Name) { MergeStmtOS << Name; }, [&] { MergeStmtOS << ", "; }); MergeStmtOS << ">(Check); });\n"; MergeStmtOS << " if (Iter != C.end()) {\n"; @@ -4719,10 +4687,10 @@ void EmitClangAttrParsedAttrImpl(const RecordKeeper &Records, raw_ostream &OS) { OS << "static constexpr ParsedAttrInfo::Spelling " << I->first << "Spellings[] = {\n"; for (const auto &S : Spellings) { - const std::string &RawSpelling = S.name(); + StringRef RawSpelling = S.name(); std::string Spelling; if (!S.nameSpace().empty()) - Spelling += S.nameSpace() + "::"; + Spelling += S.nameSpace().str() + "::"; if (S.variety() == "GNU") Spelling += NormalizeGNUAttrSpelling(RawSpelling); else @@ -4841,7 +4809,7 @@ void EmitClangAttrParsedAttrKinds(const RecordKeeper &Records, std::vector GNU, Declspec, Microsoft, CXX11, Keywords, Pragma, C23, HLSLAnnotation; - std::set Seen; + std::set Seen; for (const auto *A : Records.getAllDerivedDefinitions("Attr")) { const Record &Attr = *A; @@ -4861,38 +4829,41 @@ void EmitClangAttrParsedAttrKinds(const RecordKeeper &Records, std::string AttrName; if (Attr.isSubClassOf("TargetSpecificAttr") && !Attr.isValueUnset("ParseKind")) { - AttrName = std::string(Attr.getValueAsString("ParseKind")); - if (!Seen.insert(AttrName).second) + StringRef ParseKind = Attr.getValueAsString("ParseKind"); + if (!Seen.insert(ParseKind).second) continue; - } else - AttrName = NormalizeAttrName(StringRef(Attr.getName())).str(); + AttrName = ParseKind.str(); + } else { + AttrName = NormalizeAttrName(Attr.getName()).str(); + } std::vector Spellings = GetFlattenedSpellings(Attr); for (const auto &S : Spellings) { - const std::string &RawSpelling = S.name(); + StringRef RawSpelling = S.name(); std::vector *Matches = nullptr; std::string Spelling; - const std::string &Variety = S.variety(); + StringRef Variety = S.variety(); if (Variety == "CXX11") { Matches = &CXX11; if (!S.nameSpace().empty()) - Spelling += S.nameSpace() + "::"; + Spelling += S.nameSpace().str() + "::"; } else if (Variety == "C23") { Matches = &C23; if (!S.nameSpace().empty()) - Spelling += S.nameSpace() + "::"; - } else if (Variety == "GNU") + Spelling += S.nameSpace().str() + "::"; + } else if (Variety == "GNU") { Matches = &GNU; - else if (Variety == "Declspec") + } else if (Variety == "Declspec") { Matches = &Declspec; - else if (Variety == "Microsoft") + } else if (Variety == "Microsoft") { Matches = &Microsoft; - else if (Variety == "Keyword") + } else if (Variety == "Keyword") { Matches = &Keywords; - else if (Variety == "Pragma") + } else if (Variety == "Pragma") { Matches = &Pragma; - else if (Variety == "HLSLAnnotation") + } else if (Variety == "HLSLAnnotation") { Matches = &HLSLAnnotation; + } assert(Matches && "Unsupported spelling variety found"); @@ -5073,14 +5044,16 @@ class SpellingList { .Case("Pragma", SpellingKind::Pragma) .Case("HLSLAnnotation", SpellingKind::HLSLAnnotation); std::string Name; - if (!Spelling.nameSpace().empty()) { + StringRef NameSpace = Spelling.nameSpace(); + if (!NameSpace.empty()) { + Name = NameSpace; switch (Kind) { case SpellingKind::CXX11: case SpellingKind::C23: - Name = Spelling.nameSpace() + "::"; + Name += "::"; break; case SpellingKind::Pragma: - Name = Spelling.nameSpace() + " "; + Name = " "; break; default: PrintFatalError(Attr.getLoc(), "Unexpected namespace in spelling"); From 6c6351ee350589c8e6bcd69c3255374a714d87d0 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Mon, 28 Oct 2024 11:47:59 -0700 Subject: [PATCH 205/425] [clang][modules] Optimize construction and usage of the submodule index (#113391) This patch avoids eagerly populating the submodule index on `Module` construction. The `StringMap` allocation shows up in my profiles of `clang-scan-deps`, while the index is not necessary most of the time. We still construct it on-demand. Moreover, this patch avoids performing qualified submodule lookup in `ASTReader` whenever we're serializing a module graph whose top-level module is unknown. This is pointless, since that's guaranteed to never find any existing submodules anyway. This speeds up `clang-scan-deps` by ~0.5% on my workload. --- clang/include/clang/Basic/Module.h | 3 +-- clang/include/clang/Lex/ModuleMap.h | 13 +++++++----- clang/lib/Basic/Module.cpp | 12 ++++++----- clang/lib/Lex/ModuleMap.cpp | 29 ++++++++++++++++----------- clang/lib/Serialization/ASTReader.cpp | 14 ++++++++----- 5 files changed, 42 insertions(+), 29 deletions(-) diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 1ab3b5e5f81567f..dd384c1d76c5fde 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -227,7 +227,7 @@ class alignas(8) Module { /// A mapping from the submodule name to the index into the /// \c SubModules vector at which that submodule resides. - llvm::StringMap SubModuleIndex; + mutable llvm::StringMap SubModuleIndex; /// The AST file if this is a top-level module which has a /// corresponding serialized AST file, or null otherwise. @@ -612,7 +612,6 @@ class alignas(8) Module { void setParent(Module *M) { assert(!Parent); Parent = M; - Parent->SubModuleIndex[Name] = Parent->SubModules.size(); Parent->SubModules.push_back(this); } diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 75b567a347cb6cf..5ee152e4213abf0 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -541,11 +541,14 @@ class ModuleMap { /// /// \param IsExplicit Whether this is an explicit submodule. /// - /// \returns The found or newly-created module, along with a boolean value - /// that will be true if the module is newly-created. - std::pair findOrCreateModule(StringRef Name, Module *Parent, - bool IsFramework, - bool IsExplicit); + /// \returns The found or newly-created module. + Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework, + bool IsExplicit); + /// Create new submodule, assuming it does not exist. This function can only + /// be called when it is guaranteed that this submodule does not exist yet. + /// The parameters have same semantics as \c ModuleMap::findOrCreateModule. + Module *createModule(StringRef Name, Module *Parent, bool IsFramework, + bool IsExplicit); /// Create a global module fragment for a C++ module unit. /// diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp index a7a3f6b37efef17..330108d5b3e47f6 100644 --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -54,7 +54,6 @@ Module::Module(ModuleConstructorTag, StringRef Name, NoUndeclaredIncludes = Parent->NoUndeclaredIncludes; ModuleMapIsPrivate = Parent->ModuleMapIsPrivate; - Parent->SubModuleIndex[Name] = Parent->SubModules.size(); Parent->SubModules.push_back(this); } } @@ -351,11 +350,14 @@ void Module::markUnavailable(bool Unimportable) { } Module *Module::findSubmodule(StringRef Name) const { - llvm::StringMap::const_iterator Pos = SubModuleIndex.find(Name); - if (Pos == SubModuleIndex.end()) - return nullptr; + // Add new submodules into the index. + for (unsigned I = SubModuleIndex.size(), E = SubModules.size(); I != E; ++I) + SubModuleIndex[SubModules[I]->Name] = I; - return SubModules[Pos->getValue()]; + if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end()) + return SubModules[It->second]; + + return nullptr; } Module *Module::getGlobalModuleFragment() const { diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 201ab91cf68ca16..10774429a2177b7 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -655,8 +655,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(SkippedDir.getName()), NameBuf); - Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, - Explicit).first; + Result = + findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); // Associate the module and the directory. @@ -672,8 +672,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(File.getName()), NameBuf); - Result = findOrCreateModule(Name, Result, /*IsFramework=*/false, - Explicit).first; + Result = + findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); Result->addTopHeader(File); @@ -857,15 +857,21 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{ return Context->findSubmodule(Name); } -std::pair ModuleMap::findOrCreateModule(StringRef Name, - Module *Parent, - bool IsFramework, - bool IsExplicit) { +Module *ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, + bool IsFramework, bool IsExplicit) { // Try to find an existing module with this name. if (Module *Sub = lookupModuleQualified(Name, Parent)) - return std::make_pair(Sub, false); + return Sub; // Create a new module with this name. + return createModule(Name, Parent, IsFramework, IsExplicit); +} + +Module *ModuleMap::createModule(StringRef Name, Module *Parent, + bool IsFramework, bool IsExplicit) { + assert(lookupModuleQualified(Name, Parent) == nullptr && + "Creating duplicate submodule"); + Module *Result = new (ModulesAlloc.Allocate()) Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent, IsFramework, IsExplicit, NumCreatedModules++); @@ -875,7 +881,7 @@ std::pair ModuleMap::findOrCreateModule(StringRef Name, Modules[Name] = Result; ModuleScopeIDs[Result] = CurrentModuleScopeID; } - return std::make_pair(Result, true); + return Result; } Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc, @@ -2124,8 +2130,7 @@ void ModuleMapParser::parseModuleDecl() { Map.createShadowedModule(ModuleName, Framework, ShadowingModule); } else { ActiveModule = - Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit) - .first; + Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit); } ActiveModule->DefinitionLoc = ModuleNameLoc; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 2419ed84e68acf8..74a79ac54bb4eb2 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5756,6 +5756,13 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, return Err; ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap(); + bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr; + // If we don't know the top-level module, there's no point in doing qualified + // lookup of its submodules; it won't find anything anywhere within this tree. + // Let's skip that and avoid some string lookups. + auto CreateModule = !KnowsTopLevelModule ? &ModuleMap::createModule + : &ModuleMap::findOrCreateModule; + bool First = true; Module *CurrentModule = nullptr; RecordData Record; @@ -5829,11 +5836,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, if (Parent) ParentModule = getSubmodule(Parent); - // Retrieve this (sub)module from the module map, creating it if - // necessary. - CurrentModule = - ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit) - .first; + CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule, + IsFramework, IsExplicit); SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS; if (GlobalIndex >= SubmodulesLoaded.size() || From 5a5b78a84e7214796410265139ecf0266b1fd216 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 28 Oct 2024 18:51:10 +0000 Subject: [PATCH 206/425] [AArch64][GlobalISel] Lower aarch64.neon.smull/umull intrinsics. As with other nodes, we can convert these into G_UMULL and G_SMULL aarch64 instructions. --- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 46 ++- llvm/test/CodeGen/AArch64/aarch64-smull.ll | 313 +++++++++++++----- 2 files changed, 258 insertions(+), 101 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index dd65dbe594a6345..6024027afaf6ce9 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1536,6 +1536,14 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { + auto LowerBinOp = [&MI](unsigned Opcode) { + MachineIRBuilder MIB(MI); + MIB.buildInstr(Opcode, {MI.getOperand(0)}, + {MI.getOperand(2), MI.getOperand(3)}); + MI.eraseFromParent(); + return true; + }; + Intrinsic::ID IntrinsicID = cast(MI).getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::vacopy: { @@ -1675,37 +1683,25 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, return true; } case Intrinsic::aarch64_neon_smax: + return LowerBinOp(TargetOpcode::G_SMAX); case Intrinsic::aarch64_neon_smin: + return LowerBinOp(TargetOpcode::G_SMIN); case Intrinsic::aarch64_neon_umax: + return LowerBinOp(TargetOpcode::G_UMAX); case Intrinsic::aarch64_neon_umin: + return LowerBinOp(TargetOpcode::G_UMIN); case Intrinsic::aarch64_neon_fmax: + return LowerBinOp(TargetOpcode::G_FMAXIMUM); case Intrinsic::aarch64_neon_fmin: + return LowerBinOp(TargetOpcode::G_FMINIMUM); case Intrinsic::aarch64_neon_fmaxnm: - case Intrinsic::aarch64_neon_fminnm: { - MachineIRBuilder MIB(MI); - if (IntrinsicID == Intrinsic::aarch64_neon_smax) - MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); - else if (IntrinsicID == Intrinsic::aarch64_neon_smin) - MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); - else if (IntrinsicID == Intrinsic::aarch64_neon_umax) - MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); - else if (IntrinsicID == Intrinsic::aarch64_neon_umin) - MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3)); - else if (IntrinsicID == Intrinsic::aarch64_neon_fmax) - MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)}, - {MI.getOperand(2), MI.getOperand(3)}); - else if (IntrinsicID == Intrinsic::aarch64_neon_fmin) - MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)}, - {MI.getOperand(2), MI.getOperand(3)}); - else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm) - MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)}, - {MI.getOperand(2), MI.getOperand(3)}); - else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm) - MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)}, - {MI.getOperand(2), MI.getOperand(3)}); - MI.eraseFromParent(); - return true; - } + return LowerBinOp(TargetOpcode::G_FMAXNUM); + case Intrinsic::aarch64_neon_fminnm: + return LowerBinOp(TargetOpcode::G_FMINNUM); + case Intrinsic::aarch64_neon_smull: + return LowerBinOp(AArch64::G_UMULL); + case Intrinsic::aarch64_neon_umull: + return LowerBinOp(AArch64::G_SMULL); case Intrinsic::vector_reverse: // TODO: Add support for vector_reverse return false; diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index d677526bab00050..11397703b4442e8 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -4,16 +4,7 @@ ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v4i32_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl2_v4i32_uzp1 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl_smlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl_umlsl2_v8i16_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl_smlsl2_v4i32_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl_umlsl2_v4i32_uzp1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for do_stuff define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull_v8i8_v8i16: @@ -2025,13 +2016,30 @@ define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { } define void @smlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { -; CHECK-LABEL: smlsl2_v8i16_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b -; CHECK-NEXT: smlsl2 v1.8h, v0.16b, v2.16b -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: smlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-NEON-NEXT: smlsl2 v1.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: smlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-SVE-NEXT: smlsl2 v1.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: smlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: umlsl v1.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: str q1, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <8 x i16>, ptr %5, align 4 %7 = trunc <8 x i16> %6 to <8 x i8> @@ -2043,13 +2051,30 @@ define void @smlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { } define void @umlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { -; CHECK-LABEL: umlsl2_v8i16_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.16b, v0.16b, v2.16b -; CHECK-NEXT: umlsl2 v1.8h, v0.16b, v2.16b -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: umlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-NEON-NEXT: umlsl2 v1.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: umlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.16b, v0.16b, v2.16b +; CHECK-SVE-NEXT: umlsl2 v1.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: umlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: smlsl v1.8h, v0.8b, v2.8b +; CHECK-GI-NEXT: str q1, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <8 x i16>, ptr %5, align 4 %7 = trunc <8 x i16> %6 to <8 x i8> @@ -2061,13 +2086,30 @@ define void @umlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) { } define void @smlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { -; CHECK-LABEL: smlsl2_v4i32_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.8h, v0.8h, v2.8h -; CHECK-NEXT: smlsl2 v1.4s, v0.8h, v2.8h -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: smlsl2_v4i32_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.8h, v0.8h, v2.8h +; CHECK-NEON-NEXT: smlsl2 v1.4s, v0.8h, v2.8h +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: smlsl2_v4i32_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.8h, v0.8h, v2.8h +; CHECK-SVE-NEXT: smlsl2 v1.4s, v0.8h, v2.8h +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: smlsl2_v4i32_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.4h, v2.4s +; CHECK-GI-NEXT: umlsl v1.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: str q1, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <4 x i32>, ptr %5, align 4 %7 = trunc <4 x i32> %6 to <4 x i16> @@ -2079,13 +2121,30 @@ define void @smlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { } define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) { -; CHECK-LABEL: umlsl2_v4i32_uzp1: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q2, [x1, #16] -; CHECK-NEXT: uzp1 v2.8h, v0.8h, v2.8h -; CHECK-NEXT: umlsl2 v1.4s, v0.8h, v2.8h -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: umlsl2_v4i32_uzp1: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: ldr q2, [x1, #16] +; CHECK-NEON-NEXT: uzp1 v2.8h, v0.8h, v2.8h +; CHECK-NEON-NEXT: umlsl2 v1.4s, v0.8h, v2.8h +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: umlsl2_v4i32_uzp1: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: ldr q2, [x1, #16] +; CHECK-SVE-NEXT: uzp1 v2.8h, v0.8h, v2.8h +; CHECK-SVE-NEXT: umlsl2 v1.4s, v0.8h, v2.8h +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: umlsl2_v4i32_uzp1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q2, [x1, #16] +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: xtn v2.4h, v2.4s +; CHECK-GI-NEXT: smlsl v1.4s, v0.4h, v2.4h +; CHECK-GI-NEXT: str q1, [x0] +; CHECK-GI-NEXT: ret %5 = getelementptr inbounds i32, ptr %3, i64 4 %6 = load <4 x i32>, ptr %5, align 4 %7 = trunc <4 x i32> %6 to <4 x i16> @@ -2124,14 +2183,35 @@ entry: } define void @smlsl_smlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: smlsl_smlsl2_v8i16_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b -; CHECK-NEXT: smlsl v1.8h, v0.8b, v2.8b -; CHECK-NEXT: smlsl2 v1.8h, v0.16b, v2.16b -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: smlsl_smlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-NEON-NEXT: smlsl v1.8h, v0.8b, v2.8b +; CHECK-NEON-NEXT: smlsl2 v1.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: smlsl_smlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-SVE-NEXT: smlsl v1.8h, v0.8b, v2.8b +; CHECK-SVE-NEXT: smlsl2 v1.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: smlsl_smlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q4, q2, [x1] +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: xtn v4.8b, v4.8h +; CHECK-GI-NEXT: umull v2.8h, v3.8b, v2.8b +; CHECK-GI-NEXT: umlal v2.8h, v0.8b, v4.8b +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <8 x i16>, ptr %3, align 4 %6 = trunc <8 x i16> %5 to <8 x i8> @@ -2149,14 +2229,35 @@ entry: } define void @umlsl_umlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: umlsl_umlsl2_v8i16_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.16b, v2.16b, v3.16b -; CHECK-NEXT: umlsl v1.8h, v0.8b, v2.8b -; CHECK-NEXT: umlsl2 v1.8h, v0.16b, v2.16b -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: umlsl_umlsl2_v8i16_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-NEON-NEXT: umlsl v1.8h, v0.8b, v2.8b +; CHECK-NEON-NEXT: umlsl2 v1.8h, v0.16b, v2.16b +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: umlsl_umlsl2_v8i16_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.16b, v2.16b, v3.16b +; CHECK-SVE-NEXT: umlsl v1.8h, v0.8b, v2.8b +; CHECK-SVE-NEXT: umlsl2 v1.8h, v0.16b, v2.16b +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: umlsl_umlsl2_v8i16_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q4, q2, [x1] +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: xtn v2.8b, v2.8h +; CHECK-GI-NEXT: xtn v4.8b, v4.8h +; CHECK-GI-NEXT: smull v2.8h, v3.8b, v2.8b +; CHECK-GI-NEXT: smlal v2.8h, v0.8b, v4.8b +; CHECK-GI-NEXT: sub v0.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <8 x i16>, ptr %3, align 4 %6 = trunc <8 x i16> %5 to <8 x i8> @@ -2174,14 +2275,35 @@ entry: } define void @smlsl_smlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: smlsl_smlsl2_v4i32_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-NEXT: smlsl v1.4s, v0.4h, v2.4h -; CHECK-NEXT: smlsl2 v1.4s, v0.8h, v2.8h -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: smlsl_smlsl2_v4i32_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEON-NEXT: smlsl v1.4s, v0.4h, v2.4h +; CHECK-NEON-NEXT: smlsl2 v1.4s, v0.8h, v2.8h +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: smlsl_smlsl2_v4i32_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-SVE-NEXT: smlsl v1.4s, v0.4h, v2.4h +; CHECK-SVE-NEXT: smlsl2 v1.4s, v0.8h, v2.8h +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: smlsl_smlsl2_v4i32_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q4, q2, [x1] +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: xtn v2.4h, v2.4s +; CHECK-GI-NEXT: xtn v4.4h, v4.4s +; CHECK-GI-NEXT: umull v2.4s, v3.4h, v2.4h +; CHECK-GI-NEXT: umlal v2.4s, v0.4h, v4.4h +; CHECK-GI-NEXT: sub v0.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <4 x i32>, ptr %3, align 4 %6 = trunc <4 x i32> %5 to <4 x i16> @@ -2199,14 +2321,35 @@ entry: } define void @umlsl_umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3, i32 %4) { -; CHECK-LABEL: umlsl_umlsl2_v4i32_uzp1: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-NEXT: umlsl v1.4s, v0.4h, v2.4h -; CHECK-NEXT: umlsl2 v1.4s, v0.8h, v2.8h -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: umlsl_umlsl2_v4i32_uzp1: +; CHECK-NEON: // %bb.0: // %entry +; CHECK-NEON-NEXT: ldp q2, q3, [x1] +; CHECK-NEON-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEON-NEXT: umlsl v1.4s, v0.4h, v2.4h +; CHECK-NEON-NEXT: umlsl2 v1.4s, v0.8h, v2.8h +; CHECK-NEON-NEXT: str q1, [x0] +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: umlsl_umlsl2_v4i32_uzp1: +; CHECK-SVE: // %bb.0: // %entry +; CHECK-SVE-NEXT: ldp q2, q3, [x1] +; CHECK-SVE-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-SVE-NEXT: umlsl v1.4s, v0.4h, v2.4h +; CHECK-SVE-NEXT: umlsl2 v1.4s, v0.8h, v2.8h +; CHECK-SVE-NEXT: str q1, [x0] +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: umlsl_umlsl2_v4i32_uzp1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldp q4, q2, [x1] +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: xtn v2.4h, v2.4s +; CHECK-GI-NEXT: xtn v4.4h, v4.4s +; CHECK-GI-NEXT: smull v2.4s, v3.4h, v2.4h +; CHECK-GI-NEXT: smlal v2.4s, v0.4h, v4.4h +; CHECK-GI-NEXT: sub v0.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: str q0, [x0] +; CHECK-GI-NEXT: ret entry: %5 = load <4 x i32>, ptr %3, align 4 %6 = trunc <4 x i32> %5 to <4 x i16> @@ -2224,13 +2367,31 @@ entry: } define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) { -; CHECK-LABEL: do_stuff: -; CHECK: // %bb.0: -; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: smull2 v0.2d, v1.4s, v0.4s -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ret +; CHECK-NEON-LABEL: do_stuff: +; CHECK-NEON: // %bb.0: +; CHECK-NEON-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEON-NEXT: smull2 v0.2d, v1.4s, v0.4s +; CHECK-NEON-NEXT: xtn v0.2s, v0.2d +; CHECK-NEON-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEON-NEXT: ret +; +; CHECK-SVE-LABEL: do_stuff: +; CHECK-SVE: // %bb.0: +; CHECK-SVE-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-SVE-NEXT: smull2 v0.2d, v1.4s, v0.4s +; CHECK-SVE-NEXT: xtn v0.2s, v0.2d +; CHECK-SVE-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SVE-NEXT: ret +; +; CHECK-GI-LABEL: do_stuff: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: xtn v0.2s, v0.2d +; CHECK-GI-NEXT: ext v2.16b, v1.16b, v2.16b, #8 +; CHECK-GI-NEXT: umull v0.2d, v2.2s, v0.2s +; CHECK-GI-NEXT: xtn v0.2s, v0.2d +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: ret %bc.1 = bitcast <2 x i64> %1 to <4 x i32> %trunc.0 = trunc <2 x i64> %0 to <2 x i32> %shuff.hi = shufflevector <4 x i32> %bc.1, <4 x i32> zeroinitializer, <2 x i32> From b54bc104ea87e301816b450ee117d2d864c7d82d Mon Sep 17 00:00:00 2001 From: jimingham Date: Mon, 28 Oct 2024 11:52:32 -0700 Subject: [PATCH 207/425] =?UTF-8?q?Revert=20"Add=20the=20ability=20to=20br?= =?UTF-8?q?eak=20on=20call-site=20locations,=20improve=20inli=E2=80=A6=20(?= =?UTF-8?q?#113947)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ne stepping (#112939)" This was breaking some gdb-remote packet counting tests on the bots. I can't see how this patch could cause that breakage, but I'm reverting to figure that out. This reverts commit f14743794587db102c6d1b20f9c87a1ac20decfd. --- .../lldb/Breakpoint/BreakpointLocation.h | 36 ---- lldb/include/lldb/Breakpoint/BreakpointSite.h | 5 - lldb/include/lldb/Core/Declaration.h | 6 +- lldb/include/lldb/Target/StopInfo.h | 12 -- .../lldb/Target/ThreadPlanStepInRange.h | 4 +- lldb/source/Breakpoint/BreakpointLocation.cpp | 63 +------ lldb/source/Breakpoint/BreakpointResolver.cpp | 15 -- lldb/source/Breakpoint/BreakpointSite.cpp | 17 -- lldb/source/Core/Declaration.cpp | 5 +- lldb/source/Symbol/Block.cpp | 2 +- lldb/source/Symbol/CompileUnit.cpp | 111 +----------- lldb/source/Target/StackFrameList.cpp | 171 ++++++++++++------ lldb/source/Target/StopInfo.cpp | 55 ------ lldb/source/Target/Thread.cpp | 8 - lldb/source/Target/ThreadPlanStepInRange.cpp | 24 +-- .../source/Target/ThreadPlanStepOverRange.cpp | 2 +- .../inline-stepping/TestInlineStepping.py | 63 ------- .../inline-stepping/calling.cpp | 25 --- 18 files changed, 131 insertions(+), 493 deletions(-) diff --git a/lldb/include/lldb/Breakpoint/BreakpointLocation.h b/lldb/include/lldb/Breakpoint/BreakpointLocation.h index 3592291bb2d06e9..cca00335bc3c67d 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointLocation.h +++ b/lldb/include/lldb/Breakpoint/BreakpointLocation.h @@ -11,12 +11,10 @@ #include #include -#include #include "lldb/Breakpoint/BreakpointOptions.h" #include "lldb/Breakpoint/StoppointHitCounter.h" #include "lldb/Core/Address.h" -#include "lldb/Symbol/LineEntry.h" #include "lldb/Utility/UserID.h" #include "lldb/lldb-private.h" @@ -284,25 +282,6 @@ class BreakpointLocation /// Returns the breakpoint location ID. lldb::break_id_t GetID() const { return m_loc_id; } - /// Set the line entry that should be shown to users for this location. - /// It is up to the caller to verify that this is a valid entry to show. - /// The current use of this is to distinguish among line entries from a - /// virtual inlined call stack that all share the same address. - /// The line entry must have the same start address as the address for this - /// location. - bool SetPreferredLineEntry(const LineEntry &line_entry) { - if (m_address == line_entry.range.GetBaseAddress()) { - m_preferred_line_entry = line_entry; - return true; - } - assert(0 && "Tried to set a preferred line entry with a different address"); - return false; - } - - const std::optional GetPreferredLineEntry() { - return m_preferred_line_entry; - } - protected: friend class BreakpointSite; friend class BreakpointLocationList; @@ -327,16 +306,6 @@ class BreakpointLocation /// If it returns false we should continue, otherwise stop. bool IgnoreCountShouldStop(); - /// If this location knows that the virtual stack frame it represents is - /// not frame 0, return the suggested stack frame instead. This will happen - /// when the location's address contains a "virtual inlined call stack" and - /// the breakpoint was set on a file & line that are not at the bottom of that - /// stack. For now we key off the "preferred line entry" - looking for that - /// in the blocks that start with the stop PC. - /// This version of the API doesn't take an "inlined" parameter because it - /// only changes frames in the inline stack. - std::optional GetSuggestedStackFrameIndex(); - private: void SwapLocation(lldb::BreakpointLocationSP swap_from); @@ -400,11 +369,6 @@ class BreakpointLocation lldb::break_id_t m_loc_id; ///< Breakpoint location ID. StoppointHitCounter m_hit_counter; ///< Number of times this breakpoint /// location has been hit. - /// If this exists, use it to print the stop description rather than the - /// LineEntry m_address resolves to directly. Use this for instance when the - /// location was given somewhere in the virtual inlined call stack since the - /// Address always resolves to the lowest entry in the stack. - std::optional m_preferred_line_entry; void SetShouldResolveIndirectFunctions(bool do_resolve) { m_should_resolve_indirect_functions = do_resolve; diff --git a/lldb/include/lldb/Breakpoint/BreakpointSite.h b/lldb/include/lldb/Breakpoint/BreakpointSite.h index 7b3f7be23639f27..17b76d51c1ae53a 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointSite.h +++ b/lldb/include/lldb/Breakpoint/BreakpointSite.h @@ -170,11 +170,6 @@ class BreakpointSite : public std::enable_shared_from_this, /// \see lldb::DescriptionLevel void GetDescription(Stream *s, lldb::DescriptionLevel level); - // This runs through all the breakpoint locations owning this site and returns - // the greatest of their suggested stack frame indexes. This only handles - // inlined stack changes. - std::optional GetSuggestedStackFrameIndex(); - /// Tell whether a breakpoint has a location at this site. /// /// \param[in] bp_id diff --git a/lldb/include/lldb/Core/Declaration.h b/lldb/include/lldb/Core/Declaration.h index c864b88c6b32a37..4a0e9047b54695e 100644 --- a/lldb/include/lldb/Core/Declaration.h +++ b/lldb/include/lldb/Core/Declaration.h @@ -84,14 +84,10 @@ class Declaration { /// \param[in] declaration /// The const Declaration object to compare with. /// - /// \param[in] full - /// Same meaning as Full in FileSpec::Equal. True means an empty - /// directory is not equal to a specified one, false means it is equal. - /// /// \return /// Returns \b true if \b declaration is at the same file and /// line, \b false otherwise. - bool FileAndLineEqual(const Declaration &declaration, bool full) const; + bool FileAndLineEqual(const Declaration &declaration) const; /// Dump a description of this object to a Stream. /// diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h index 45beac129e86f7a..fae90364deaf0a3 100644 --- a/lldb/include/lldb/Target/StopInfo.h +++ b/lldb/include/lldb/Target/StopInfo.h @@ -77,18 +77,6 @@ class StopInfo : public std::enable_shared_from_this { m_description.clear(); } - /// This gives the StopInfo a chance to suggest a stack frame to select. - /// Passing true for inlined_stack will request changes to the inlined - /// call stack. Passing false will request changes to the real stack - /// frame. The inlined stack gets adjusted before we call into the thread - /// plans so they can reason based on the correct values. The real stack - /// adjustment is handled after the frame recognizers get a chance to adjust - /// the frame. - virtual std::optional - GetSuggestedStackFrameIndex(bool inlined_stack) { - return {}; - } - virtual bool IsValidForOperatingSystemThread(Thread &thread) { return true; } /// A Continue operation can result in a false stop event diff --git a/lldb/include/lldb/Target/ThreadPlanStepInRange.h b/lldb/include/lldb/Target/ThreadPlanStepInRange.h index 9da8370ef1c9250..f9ef87942a7c03d 100644 --- a/lldb/include/lldb/Target/ThreadPlanStepInRange.h +++ b/lldb/include/lldb/Target/ThreadPlanStepInRange.h @@ -80,8 +80,8 @@ class ThreadPlanStepInRange : public ThreadPlanStepRange, bool m_step_past_prologue; // FIXME: For now hard-coded to true, we could put // a switch in for this if there's // demand for that. - LazyBool m_virtual_step; // true if we've just done a "virtual step", i.e. - // just moved the inline stack depth. + bool m_virtual_step; // true if we've just done a "virtual step", i.e. just + // moved the inline stack depth. ConstString m_step_into_target; ThreadPlanStepInRange(const ThreadPlanStepInRange &) = delete; const ThreadPlanStepInRange & diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index c7ea50407ae1c77..ad9057c8141e99b 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -508,20 +508,8 @@ void BreakpointLocation::GetDescription(Stream *s, s->PutCString("re-exported target = "); else s->PutCString("where = "); - - // If there's a preferred line entry for printing, use that. - bool show_function_info = true; - if (auto preferred = GetPreferredLineEntry()) { - sc.line_entry = *preferred; - // FIXME: We're going to get the function name wrong when the preferred - // line entry is not the lowest one. For now, just leave the function - // out in this case, but we really should also figure out how to easily - // fake the function name here. - show_function_info = false; - } sc.DumpStopContext(s, m_owner.GetTarget().GetProcessSP().get(), m_address, - false, true, false, show_function_info, - show_function_info, show_function_info); + false, true, false, true, true, true); } else { if (sc.module_sp) { s->EOL(); @@ -549,10 +537,7 @@ void BreakpointLocation::GetDescription(Stream *s, if (sc.line_entry.line > 0) { s->EOL(); s->Indent("location = "); - if (auto preferred = GetPreferredLineEntry()) - preferred->DumpStopContext(s, true); - else - sc.line_entry.DumpStopContext(s, true); + sc.line_entry.DumpStopContext(s, true); } } else { @@ -671,50 +656,6 @@ void BreakpointLocation::SendBreakpointLocationChangedEvent( } } -std::optional BreakpointLocation::GetSuggestedStackFrameIndex() { - auto preferred_opt = GetPreferredLineEntry(); - if (!preferred_opt) - return {}; - LineEntry preferred = *preferred_opt; - SymbolContext sc; - if (!m_address.CalculateSymbolContext(&sc)) - return {}; - // Don't return anything special if frame 0 is the preferred line entry. - // We not really telling the stack frame list to do anything special in that - // case. - if (!LineEntry::Compare(sc.line_entry, preferred)) - return {}; - - if (!sc.block) - return {}; - - // Blocks have their line info in Declaration form, so make one here: - Declaration preferred_decl(preferred.GetFile(), preferred.line, - preferred.column); - - uint32_t depth = 0; - Block *inlined_block = sc.block->GetContainingInlinedBlock(); - while (inlined_block) { - // If we've moved to a block that this isn't the start of, that's not - // our inlining info or call site, so we can stop here. - Address start_address; - if (!inlined_block->GetStartAddress(start_address) || - start_address != m_address) - return {}; - - const InlineFunctionInfo *info = inlined_block->GetInlinedFunctionInfo(); - if (info) { - if (preferred_decl == info->GetDeclaration()) - return depth; - if (preferred_decl == info->GetCallSite()) - return depth + 1; - } - inlined_block = inlined_block->GetInlinedParent(); - depth++; - } - return {}; -} - void BreakpointLocation::SwapLocation(BreakpointLocationSP swap_from) { m_address = swap_from->m_address; m_should_resolve_indirect_functions = diff --git a/lldb/source/Breakpoint/BreakpointResolver.cpp b/lldb/source/Breakpoint/BreakpointResolver.cpp index 9643602d78c751d..8307689c7640cfe 100644 --- a/lldb/source/Breakpoint/BreakpointResolver.cpp +++ b/lldb/source/Breakpoint/BreakpointResolver.cpp @@ -340,21 +340,6 @@ void BreakpointResolver::AddLocation(SearchFilter &filter, } BreakpointLocationSP bp_loc_sp(AddLocation(line_start)); - // If the address that we resolved the location to returns a different - // LineEntry from the one in the incoming SC, we're probably dealing with an - // inlined call site, so set that as the preferred LineEntry: - LineEntry resolved_entry; - if (!skipped_prologue && bp_loc_sp && - line_start.CalculateSymbolContextLineEntry(resolved_entry) && - LineEntry::Compare(resolved_entry, sc.line_entry)) { - // FIXME: The function name will also be wrong here. Do we need to record - // that as well, or can we figure that out again when we report this - // breakpoint location. - if (!bp_loc_sp->SetPreferredLineEntry(sc.line_entry)) { - LLDB_LOG(log, "Tried to add a preferred line entry that didn't have the " - "same address as this location's address."); - } - } if (log && bp_loc_sp && !GetBreakpoint()->IsInternal()) { StreamString s; bp_loc_sp->GetDescription(&s, lldb::eDescriptionLevelVerbose); diff --git a/lldb/source/Breakpoint/BreakpointSite.cpp b/lldb/source/Breakpoint/BreakpointSite.cpp index 9700a57d3346e0b..3ca93f908e30b8b 100644 --- a/lldb/source/Breakpoint/BreakpointSite.cpp +++ b/lldb/source/Breakpoint/BreakpointSite.cpp @@ -87,23 +87,6 @@ void BreakpointSite::GetDescription(Stream *s, lldb::DescriptionLevel level) { m_constituents.GetDescription(s, level); } -std::optional BreakpointSite::GetSuggestedStackFrameIndex() { - - std::optional result; - std::lock_guard guard(m_constituents_mutex); - for (BreakpointLocationSP loc_sp : m_constituents.BreakpointLocations()) { - std::optional loc_frame_index = - loc_sp->GetSuggestedStackFrameIndex(); - if (loc_frame_index) { - if (result) - result = std::max(*loc_frame_index, *result); - else - result = loc_frame_index; - } - } - return result; -} - bool BreakpointSite::IsInternal() const { return m_constituents.IsInternal(); } uint8_t *BreakpointSite::GetTrapOpcodeBytes() { return &m_trap_opcode[0]; } diff --git a/lldb/source/Core/Declaration.cpp b/lldb/source/Core/Declaration.cpp index a485c4b9ba48a7d..579a3999d14ea09 100644 --- a/lldb/source/Core/Declaration.cpp +++ b/lldb/source/Core/Declaration.cpp @@ -70,9 +70,8 @@ int Declaration::Compare(const Declaration &a, const Declaration &b) { return 0; } -bool Declaration::FileAndLineEqual(const Declaration &declaration, - bool full) const { - int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, full); +bool Declaration::FileAndLineEqual(const Declaration &declaration) const { + int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, true); return file_compare == 0 && this->m_line == declaration.m_line; } diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp index 5c7772a6db780d3..f7d9c0d2d330656 100644 --- a/lldb/source/Symbol/Block.cpp +++ b/lldb/source/Symbol/Block.cpp @@ -230,7 +230,7 @@ Block *Block::GetContainingInlinedBlockWithCallSite( const auto *function_info = inlined_block->GetInlinedFunctionInfo(); if (function_info && - function_info->GetCallSite().FileAndLineEqual(find_call_site, true)) + function_info->GetCallSite().FileAndLineEqual(find_call_site)) return inlined_block; inlined_block = inlined_block->GetInlinedParent(); } diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index f0f7e40ae70d832..db8f8ce6bcbc923 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -251,10 +251,7 @@ void CompileUnit::ResolveSymbolContext( SymbolContextItem resolve_scope, SymbolContextList &sc_list, RealpathPrefixes *realpath_prefixes) { const FileSpec file_spec = src_location_spec.GetFileSpec(); - const uint32_t line = - src_location_spec.GetLine().value_or(LLDB_INVALID_LINE_NUMBER); - const uint32_t column_num = - src_location_spec.GetColumn().value_or(LLDB_INVALID_COLUMN_NUMBER); + const uint32_t line = src_location_spec.GetLine().value_or(0); const bool check_inlines = src_location_spec.GetCheckInlines(); // First find all of the file indexes that match our "file_spec". If @@ -315,112 +312,6 @@ void CompileUnit::ResolveSymbolContext( 0, file_indexes, src_location_spec, &line_entry); } - // If we didn't manage to find a breakpoint that matched the line number - // requested, that might be because it is only an inline call site, and - // doesn't have a line entry in the line table. Scan for that here. - // - // We are making the assumption that if there was an inlined function it will - // contribute at least 1 non-call-site entry to the line table. That's handy - // because we don't move line breakpoints over function boundaries, so if we - // found a hit, and there were also a call site entry, it would have to be in - // the function containing the PC of the line table match. That way we can - // limit the call site search to that function. - // We will miss functions that ONLY exist as a call site entry. - - if (line_entry.IsValid() && - (line_entry.line != line || line_entry.column != column_num) && - resolve_scope & eSymbolContextLineEntry && check_inlines) { - // We don't move lines over function boundaries, so the address in the - // line entry will be the in function that contained the line that might - // be a CallSite, and we can just iterate over that function to find any - // inline records, and dig up their call sites. - Address start_addr = line_entry.range.GetBaseAddress(); - Function *function = start_addr.CalculateSymbolContextFunction(); - - Declaration sought_decl(file_spec, line, column_num); - // We use this recursive function to descend the block structure looking - // for a block that has this Declaration as in it's CallSite info. - // This function recursively scans the sibling blocks of the incoming - // block parameter. - std::function examine_block = - [&sought_decl, &sc_list, &src_location_spec, resolve_scope, - &examine_block](Block &block) -> void { - // Iterate over the sibling child blocks of the incoming block. - Block *sibling_block = block.GetFirstChild(); - while (sibling_block) { - // We only have to descend through the regular blocks, looking for - // immediate inlines, since those are the only ones that will have this - // callsite. - const InlineFunctionInfo *inline_info = - sibling_block->GetInlinedFunctionInfo(); - if (inline_info) { - // If this is the call-site we are looking for, record that: - // We need to be careful because the call site from the debug info - // will generally have a column, but the user might not have specified - // it. - Declaration found_decl = inline_info->GetCallSite(); - uint32_t sought_column = sought_decl.GetColumn(); - if (found_decl.FileAndLineEqual(sought_decl, false) && - (sought_column == LLDB_INVALID_COLUMN_NUMBER || - sought_column == found_decl.GetColumn())) { - // If we found a call site, it belongs not in this inlined block, - // but in the parent block that inlined it. - Address parent_start_addr; - if (sibling_block->GetParent()->GetStartAddress( - parent_start_addr)) { - SymbolContext sc; - parent_start_addr.CalculateSymbolContext(&sc, resolve_scope); - // Now swap out the line entry for the one we found. - LineEntry call_site_line = sc.line_entry; - call_site_line.line = found_decl.GetLine(); - call_site_line.column = found_decl.GetColumn(); - bool matches_spec = true; - // If the user asked for an exact match, we need to make sure the - // call site we found actually matches the location. - if (src_location_spec.GetExactMatch()) { - matches_spec = false; - if ((src_location_spec.GetFileSpec() == - sc.line_entry.GetFile()) && - (src_location_spec.GetLine() && - *src_location_spec.GetLine() == call_site_line.line) && - (src_location_spec.GetColumn() && - *src_location_spec.GetColumn() == call_site_line.column)) - matches_spec = true; - } - if (matches_spec && - sibling_block->GetRangeAtIndex(0, call_site_line.range)) { - SymbolContext call_site_sc(sc.target_sp, sc.module_sp, - sc.comp_unit, sc.function, sc.block, - &call_site_line, sc.symbol); - sc_list.Append(call_site_sc); - } - } - } - } - - // Descend into the child blocks: - examine_block(*sibling_block); - // Now go to the next sibling: - sibling_block = sibling_block->GetSibling(); - } - }; - - if (function) { - // We don't need to examine the function block, it can't be inlined. - Block &func_block = function->GetBlock(true); - examine_block(func_block); - } - // If we found entries here, we are done. We only get here because we - // didn't find an exact line entry for this line & column, but if we found - // an exact match from the call site info that's strictly better than - // continuing to look for matches further on in the file. - // FIXME: Should I also do this for "call site line exists between the - // given line number and the later line we found in the line table"? That's - // a closer approximation to our general sliding algorithm. - if (sc_list.GetSize()) - return; - } - // If "exact == true", then "found_line" will be the same as "line". If // "exact == false", the "found_line" will be the closest line entry // with a line number greater than "line" and we will use this for our diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index 94a381edd5e2027..3849ec5ed178d9a 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -85,32 +85,121 @@ void StackFrameList::ResetCurrentInlinedDepth() { return; std::lock_guard guard(m_mutex); + + GetFramesUpTo(0, DoNotAllowInterruption); + if (m_frames.empty()) + return; + if (!m_frames[0]->IsInlined()) { + m_current_inlined_depth = UINT32_MAX; + m_current_inlined_pc = LLDB_INVALID_ADDRESS; + Log *log = GetLog(LLDBLog::Step); + if (log && log->GetVerbose()) + LLDB_LOGF( + log, + "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); + return; + } - m_current_inlined_pc = LLDB_INVALID_ADDRESS; - m_current_inlined_depth = UINT32_MAX; + // We only need to do something special about inlined blocks when we are + // at the beginning of an inlined function: + // FIXME: We probably also have to do something special if the PC is at + // the END of an inlined function, which coincides with the end of either + // its containing function or another inlined function. + + Block *block_ptr = m_frames[0]->GetFrameBlock(); + if (!block_ptr) + return; + Address pc_as_address; + lldb::addr_t curr_pc = m_thread.GetRegisterContext()->GetPC(); + pc_as_address.SetLoadAddress(curr_pc, &(m_thread.GetProcess()->GetTarget())); + AddressRange containing_range; + if (!block_ptr->GetRangeContainingAddress(pc_as_address, containing_range) || + pc_as_address != containing_range.GetBaseAddress()) + return; + + // If we got here because of a breakpoint hit, then set the inlined depth + // depending on where the breakpoint was set. If we got here because of a + // crash, then set the inlined depth to the deepest most block. Otherwise, + // we stopped here naturally as the result of a step, so set ourselves in the + // containing frame of the whole set of nested inlines, so the user can then + // "virtually" step into the frames one by one, or next over the whole mess. + // Note: We don't have to handle being somewhere in the middle of the stack + // here, since ResetCurrentInlinedDepth doesn't get called if there is a + // valid inlined depth set. StopInfoSP stop_info_sp = m_thread.GetStopInfo(); if (!stop_info_sp) return; + switch (stop_info_sp->GetStopReason()) { + case eStopReasonWatchpoint: + case eStopReasonException: + case eStopReasonExec: + case eStopReasonFork: + case eStopReasonVFork: + case eStopReasonVForkDone: + case eStopReasonSignal: + // In all these cases we want to stop in the deepest frame. + m_current_inlined_pc = curr_pc; + m_current_inlined_depth = 0; + break; + case eStopReasonBreakpoint: { + // FIXME: Figure out what this break point is doing, and set the inline + // depth appropriately. Be careful to take into account breakpoints that + // implement step over prologue, since that should do the default + // calculation. For now, if the breakpoints corresponding to this hit are + // all internal, I set the stop location to the top of the inlined stack, + // since that will make things like stepping over prologues work right. + // But if there are any non-internal breakpoints I do to the bottom of the + // stack, since that was the old behavior. + uint32_t bp_site_id = stop_info_sp->GetValue(); + BreakpointSiteSP bp_site_sp( + m_thread.GetProcess()->GetBreakpointSiteList().FindByID(bp_site_id)); + bool all_internal = true; + if (bp_site_sp) { + uint32_t num_owners = bp_site_sp->GetNumberOfConstituents(); + for (uint32_t i = 0; i < num_owners; i++) { + Breakpoint &bp_ref = + bp_site_sp->GetConstituentAtIndex(i)->GetBreakpoint(); + if (!bp_ref.IsInternal()) { + all_internal = false; + } + } + } + if (!all_internal) { + m_current_inlined_pc = curr_pc; + m_current_inlined_depth = 0; + break; + } + } + [[fallthrough]]; + default: { + // Otherwise, we should set ourselves at the container of the inlining, so + // that the user can descend into them. So first we check whether we have + // more than one inlined block sharing this PC: + int num_inlined_functions = 0; + + for (Block *container_ptr = block_ptr->GetInlinedParent(); + container_ptr != nullptr; + container_ptr = container_ptr->GetInlinedParent()) { + if (!container_ptr->GetRangeContainingAddress(pc_as_address, + containing_range)) + break; + if (pc_as_address != containing_range.GetBaseAddress()) + break; - bool inlined = true; - auto inline_depth = stop_info_sp->GetSuggestedStackFrameIndex(inlined); - // We're only adjusting the inlined stack here. - Log *log = GetLog(LLDBLog::Step); - if (inline_depth) { - m_current_inlined_depth = *inline_depth; - m_current_inlined_pc = m_thread.GetRegisterContext()->GetPC(); - + num_inlined_functions++; + } + m_current_inlined_pc = curr_pc; + m_current_inlined_depth = num_inlined_functions + 1; + Log *log = GetLog(LLDBLog::Step); if (log && log->GetVerbose()) LLDB_LOGF(log, "ResetCurrentInlinedDepth: setting inlined " "depth: %d 0x%" PRIx64 ".\n", - m_current_inlined_depth, m_current_inlined_pc); - } else { - if (log && log->GetVerbose()) - LLDB_LOGF( - log, - "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); + m_current_inlined_depth, curr_pc); + + break; + } } } @@ -727,48 +816,19 @@ void StackFrameList::SelectMostRelevantFrame() { RecognizedStackFrameSP recognized_frame_sp = frame_sp->GetRecognizedFrame(); - if (recognized_frame_sp) { - if (StackFrameSP most_relevant_frame_sp = - recognized_frame_sp->GetMostRelevantFrame()) { - LLDB_LOG(log, "Found most relevant frame at index {0}", - most_relevant_frame_sp->GetFrameIndex()); - SetSelectedFrame(most_relevant_frame_sp.get()); - return; - } - } - LLDB_LOG(log, "Frame #0 not recognized"); - - // If this thread has a non-trivial StopInof, then let it suggest - // a most relevant frame: - StopInfoSP stop_info_sp = m_thread.GetStopInfo(); - uint32_t stack_idx = 0; - bool found_relevant = false; - if (stop_info_sp) { - // Here we're only asking the stop info if it wants to adjust the real stack - // index. We have to ask about the m_inlined_stack_depth in - // Thread::ShouldStop since the plans need to reason with that info. - bool inlined = false; - std::optional stack_opt = - stop_info_sp->GetSuggestedStackFrameIndex(inlined); - if (stack_opt) { - stack_idx = *stack_opt; - found_relevant = true; - } + if (!recognized_frame_sp) { + LLDB_LOG(log, "Frame #0 not recognized"); + return; } - frame_sp = GetFrameAtIndex(stack_idx); - if (!frame_sp) - LLDB_LOG(log, "Stop info suggested relevant frame {0} but it didn't exist", - stack_idx); - else if (found_relevant) - LLDB_LOG(log, "Setting selected frame from stop info to {0}", stack_idx); - // Note, we don't have to worry about "inlined" frames here, because we've - // already calculated the inlined frame in Thread::ShouldStop, and - // SetSelectedFrame will take care of that adjustment for us. - SetSelectedFrame(frame_sp.get()); - - if (!found_relevant) + if (StackFrameSP most_relevant_frame_sp = + recognized_frame_sp->GetMostRelevantFrame()) { + LLDB_LOG(log, "Found most relevant frame at index {0}", + most_relevant_frame_sp->GetFrameIndex()); + SetSelectedFrame(most_relevant_frame_sp.get()); + } else { LLDB_LOG(log, "No relevant frame!"); + } } uint32_t StackFrameList::GetSelectedFrameIndex( @@ -781,7 +841,6 @@ uint32_t StackFrameList::GetSelectedFrameIndex( // isn't set, then don't force a selection here, just return 0. if (!select_most_relevant) return 0; - // If the inlined stack frame is set, then use that: m_selected_frame_idx = 0; } return *m_selected_frame_idx; diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index f6387d47504e626..60aa65ed38c7494 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -15,7 +15,6 @@ #include "lldb/Breakpoint/WatchpointResource.h" #include "lldb/Core/Debugger.h" #include "lldb/Expression/UserExpression.h" -#include "lldb/Symbol/Block.h" #include "lldb/Target/Process.h" #include "lldb/Target/StopInfo.h" #include "lldb/Target/Target.h" @@ -247,22 +246,6 @@ class StopInfoBreakpoint : public StopInfo { return m_description.c_str(); } - std::optional - GetSuggestedStackFrameIndex(bool inlined_stack) override { - if (!inlined_stack) - return {}; - - ThreadSP thread_sp(m_thread_wp.lock()); - if (!thread_sp) - return {}; - BreakpointSiteSP bp_site_sp( - thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); - if (!bp_site_sp) - return {}; - - return bp_site_sp->GetSuggestedStackFrameIndex(); - } - protected: bool ShouldStop(Event *event_ptr) override { // This just reports the work done by PerformAction or the synchronous @@ -1181,44 +1164,6 @@ class StopInfoTrace : public StopInfo { else return m_description.c_str(); } - - std::optional - GetSuggestedStackFrameIndex(bool inlined_stack) override { - // Trace only knows how to adjust inlined stacks: - if (!inlined_stack) - return {}; - - ThreadSP thread_sp = GetThread(); - StackFrameSP frame_0_sp = thread_sp->GetStackFrameAtIndex(0); - if (!frame_0_sp) - return {}; - if (!frame_0_sp->IsInlined()) - return {}; - Block *block_ptr = frame_0_sp->GetFrameBlock(); - if (!block_ptr) - return {}; - Address pc_address = frame_0_sp->GetFrameCodeAddress(); - AddressRange containing_range; - if (!block_ptr->GetRangeContainingAddress(pc_address, containing_range) || - pc_address != containing_range.GetBaseAddress()) - return {}; - - int num_inlined_functions = 0; - - for (Block *container_ptr = block_ptr->GetInlinedParent(); - container_ptr != nullptr; - container_ptr = container_ptr->GetInlinedParent()) { - if (!container_ptr->GetRangeContainingAddress(pc_address, - containing_range)) - break; - if (pc_address != containing_range.GetBaseAddress()) - break; - - num_inlined_functions++; - } - inlined_stack = true; - return num_inlined_functions + 1; - } }; // StopInfoException diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 735295e6f25937a..8373cdc36268f8d 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -619,14 +619,6 @@ void Thread::WillStop() { void Thread::SetupForResume() { if (GetResumeState() != eStateSuspended) { - // First check whether this thread is going to "actually" resume at all. - // For instance, if we're stepping from one level to the next of an - // virtual inlined call stack, we just change the inlined call stack index - // without actually running this thread. In that case, for this thread we - // shouldn't push a step over breakpoint plan or do that work. - if (GetCurrentPlan()->IsVirtualStep()) - return; - // If we're at a breakpoint push the step-over breakpoint plan. Do this // before telling the current plan it will resume, since we might change // what the current plan is. diff --git a/lldb/source/Target/ThreadPlanStepInRange.cpp b/lldb/source/Target/ThreadPlanStepInRange.cpp index 325a70619908b6b..567dcc26d0d3727 100644 --- a/lldb/source/Target/ThreadPlanStepInRange.cpp +++ b/lldb/source/Target/ThreadPlanStepInRange.cpp @@ -41,7 +41,7 @@ ThreadPlanStepInRange::ThreadPlanStepInRange( "Step Range stepping in", thread, range, addr_context, stop_others), ThreadPlanShouldStopHere(this), m_step_past_prologue(true), - m_virtual_step(eLazyBoolCalculate), m_step_into_target(step_into_target) { + m_virtual_step(false), m_step_into_target(step_into_target) { SetCallbacks(); SetFlagsToDefault(); SetupAvoidNoDebug(step_in_avoids_code_without_debug_info, @@ -149,7 +149,7 @@ bool ThreadPlanStepInRange::ShouldStop(Event *event_ptr) { m_sub_plan_sp.reset(); } - if (m_virtual_step == eLazyBoolYes) { + if (m_virtual_step) { // If we've just completed a virtual step, all we need to do is check for a // ShouldStopHere plan, and otherwise we're done. // FIXME - This can be both a step in and a step out. Probably should @@ -431,7 +431,7 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool return_value = false; - if (m_virtual_step == eLazyBoolYes) { + if (m_virtual_step) { return_value = true; } else { StopInfoSP stop_info_sp = GetPrivateStopInfo(); @@ -460,13 +460,10 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, bool current_plan) { - m_virtual_step = eLazyBoolCalculate; + m_virtual_step = false; if (resume_state == eStateStepping && current_plan) { Thread &thread = GetThread(); // See if we are about to step over a virtual inlined call. - // But if we already know we're virtual stepping, don't decrement the - // inlined depth again... - bool step_without_resume = thread.DecrementCurrentInlinedDepth(); if (step_without_resume) { Log *log = GetLog(LLDBLog::Step); @@ -479,20 +476,11 @@ bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, // FIXME: Maybe it would be better to create a InlineStep stop reason, but // then // the whole rest of the world would have to handle that stop reason. - m_virtual_step = eLazyBoolYes; + m_virtual_step = true; } return !step_without_resume; } return true; } -bool ThreadPlanStepInRange::IsVirtualStep() { - if (m_virtual_step == eLazyBoolCalculate) { - Thread &thread = GetThread(); - if (thread.GetCurrentInlinedDepth() == UINT32_MAX) - m_virtual_step = eLazyBoolNo; - else - m_virtual_step = eLazyBoolYes; - } - return m_virtual_step == eLazyBoolYes; -} +bool ThreadPlanStepInRange::IsVirtualStep() { return m_virtual_step; } diff --git a/lldb/source/Target/ThreadPlanStepOverRange.cpp b/lldb/source/Target/ThreadPlanStepOverRange.cpp index 643ee827c865cb8..ef5b4b5c434d16e 100644 --- a/lldb/source/Target/ThreadPlanStepOverRange.cpp +++ b/lldb/source/Target/ThreadPlanStepOverRange.cpp @@ -402,7 +402,7 @@ bool ThreadPlanStepOverRange::DoWillResume(lldb::StateType resume_state, if (in_inlined_stack) { Log *log = GetLog(LLDBLog::Step); LLDB_LOGF(log, - "ThreadPlanStepOverRange::DoWillResume: adjusting range to " + "ThreadPlanStepInRange::DoWillResume: adjusting range to " "the frame at inlined depth %d.", thread.GetCurrentInlinedDepth()); StackFrameSP stack_sp = thread.GetStackFrameAtIndex(0); diff --git a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py index f52e0f0fd5bcfe0..752c3a9cbd286a8 100644 --- a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py +++ b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py @@ -32,12 +32,6 @@ def test_step_in_template_with_python_api(self): self.build() self.step_in_template() - @add_test_categories(["pyapi"]) - def test_virtual_inline_stepping(self): - """Test stepping through a virtual inlined call stack""" - self.build() - self.virtual_inline_stepping() - def setUp(self): # Call super's setUp(). TestBase.setUp(self) @@ -363,60 +357,3 @@ def step_in_template(self): step_sequence = [["// In max_value specialized", "into"]] self.run_step_sequence(step_sequence) - - def run_to_call_site_and_step(self, source_regex, func_name, start_pos): - main_spec = lldb.SBFileSpec("calling.cpp") - # Set the breakpoint by file and line, not sourced regex because - # we want to make sure we can set breakpoints on call sites: - call_site_line_num = line_number(self.main_source, source_regex) - target, process, thread, bkpt = lldbutil.run_to_line_breakpoint( - self, main_spec, call_site_line_num - ) - - # Make sure that the location is at the call site (run_to_line_breakpoint already asserted - # that there's one location.): - bkpt_loc = bkpt.location[0] - strm = lldb.SBStream() - result = bkpt_loc.GetDescription(strm, lldb.eDescriptionLevelFull) - - self.assertTrue(result, "Got a location description") - desc = strm.GetData() - self.assertIn(f"calling.cpp:{call_site_line_num}", desc, "Right line listed") - # We don't get the function name right yet - so we omit it in printing. - # Turn on this test when that is working. - # self.assertIn(func_name, desc, "Right function listed") - - pc = thread.frame[0].pc - for i in range(start_pos, 3): - thread.StepInto() - frame_0 = thread.frame[0] - - trivial_line_num = line_number( - self.main_source, f"In caller_trivial_inline_{i}." - ) - self.assertEqual( - frame_0.line_entry.line, - trivial_line_num, - f"Stepped into the caller_trivial_inline_{i}", - ) - if pc != frame_0.pc: - # If we get here, we stepped to the expected line number, but - # the compiler on this system has decided to insert an instruction - # between the call site of an inlined function with no arguments, - # returning void, and its immediate call to another void inlined function - # with no arguments. We aren't going to be testing virtual inline - # stepping for this function... - break - - process.Kill() - target.Clear() - - def virtual_inline_stepping(self): - """Use the Python API's to step through a virtual inlined stack""" - self.run_to_call_site_and_step("At caller_trivial_inline_1", "main", 1) - self.run_to_call_site_and_step( - "In caller_trivial_inline_1", "caller_trivial_inline_1", 2 - ) - self.run_to_call_site_and_step( - "In caller_trivial_inline_2", "caller_trivial_inline_2", 3 - ) diff --git a/lldb/test/API/functionalities/inline-stepping/calling.cpp b/lldb/test/API/functionalities/inline-stepping/calling.cpp index d7ee56b3c079091..49179ce7c97883c 100644 --- a/lldb/test/API/functionalities/inline-stepping/calling.cpp +++ b/lldb/test/API/functionalities/inline-stepping/calling.cpp @@ -13,12 +13,6 @@ int called_by_inline_ref (int &value); inline void inline_trivial_1 () __attribute__((always_inline)); inline void inline_trivial_2 () __attribute__((always_inline)); -// These three should share the same initial pc so we can test -// virtual inline stepping. -inline void caller_trivial_inline_1() __attribute__((always_inline)); -inline void caller_trivial_inline_2() __attribute__((always_inline)); -inline void caller_trivial_inline_3() __attribute__((always_inline)); - void caller_trivial_1 (); void caller_trivial_2 (); @@ -85,23 +79,6 @@ caller_trivial_2 () inline_value += 1; // At increment in caller_trivial_2. } -// When you call caller_trivial_inline_1, the inlined call-site -// should share a PC with all three of the following inlined -// functions, so we can exercise "virtual inline stepping". -void caller_trivial_inline_1() { - caller_trivial_inline_2(); // In caller_trivial_inline_1. - inline_value += 1; -} - -void caller_trivial_inline_2() { - caller_trivial_inline_3(); // In caller_trivial_inline_2. - inline_value += 1; -} - -void caller_trivial_inline_3() { - inline_value += 1; // In caller_trivial_inline_3. -} - void called_by_inline_trivial () { @@ -155,7 +132,5 @@ main (int argc, char **argv) max_value(123, 456); // Call max_value template max_value(std::string("abc"), std::string("0022")); // Call max_value specialized - caller_trivial_inline_1(); // At caller_trivial_inline_1. - return 0; // About to return from main. } From 8274be509ed9e07188a8a64d95907a46cbe8e657 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 28 Oct 2024 18:53:38 +0000 Subject: [PATCH 208/425] [AArch64] Remove header dependencies of AArch64ISelLowering.h. NFC This patch aims to reduce the include used by AArch64ISelLowering, allowing it to be included by unittests so that they can reference the AArch64ISD nodes. It: - Moves the inclusion of AArch64SMEAttributes.h to the uses. - Moves LowerPtrAuthGlobalAddressStatically to a static function, so that AArch64PACKey is not required in the header. - Moves the definitions of getExceptionPointerRegister to the cpp file, to remove the reference of AArch64::X0. --- llvm/lib/Target/AArch64/AArch64FastISel.cpp | 1 + .../Target/AArch64/AArch64FrameLowering.cpp | 1 + .../Target/AArch64/AArch64ISelLowering.cpp | 21 +++++++++++++++++-- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 17 ++------------- .../AArch64/AArch64SelectionDAGInfo.cpp | 2 ++ .../AArch64/AArch64TargetTransformInfo.cpp | 1 + .../AArch64/GISel/AArch64CallLowering.cpp | 1 + 7 files changed, 27 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index cbf38f2c57a35e9..6c874fcabcc3022 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -19,6 +19,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index dfaa36f7f512d80..9af6429c5caee0d 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -212,6 +212,7 @@ #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4c0cd1ac3d45126..32ba2866ac81807 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -19,6 +19,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -10082,9 +10083,9 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, // Thus, it's only used for ptrauth references to extern_weak to avoid null // checks. -SDValue AArch64TargetLowering::LowerPtrAuthGlobalAddressStatically( +static SDValue LowerPtrAuthGlobalAddressStatically( SDValue TGA, SDLoc DL, EVT VT, AArch64PACKey::ID KeyC, - SDValue Discriminator, SDValue AddrDiscriminator, SelectionDAG &DAG) const { + SDValue Discriminator, SDValue AddrDiscriminator, SelectionDAG &DAG) { const auto *TGN = cast(TGA.getNode()); assert(TGN->getGlobal()->hasExternalWeakLinkage()); @@ -27574,6 +27575,22 @@ AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { return TargetLowering::getSafeStackPointerLocation(IRB); } +/// If a physical register, this returns the register that receives the +/// exception address on entry to an EH pad. +Register AArch64TargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + // FIXME: This is a guess. Has this been defined yet? + return AArch64::X0; +} + +/// If a physical register, this returns the register that receives the +/// exception typeid on entry to a landing pad. +Register AArch64TargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + // FIXME: This is a guess. Has this been defined yet? + return AArch64::X1; +} + bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial( const Instruction &AndI) const { // Only sink 'and' mask to cmp use block if it is masking a single bit, since diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 160cd18ca53b32c..d696355bb062a89 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -14,8 +14,6 @@ #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H -#include "AArch64.h" -#include "Utils/AArch64SMEAttributes.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -830,18 +828,12 @@ class AArch64TargetLowering : public TargetLowering { /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. Register - getExceptionPointerRegister(const Constant *PersonalityFn) const override { - // FIXME: This is a guess. Has this been defined yet? - return AArch64::X0; - } + getExceptionPointerRegister(const Constant *PersonalityFn) const override; /// If a physical register, this returns the register that receives the /// exception typeid on entry to a landing pad. Register - getExceptionSelectorRegister(const Constant *PersonalityFn) const override { - // FIXME: This is a guess. Has this been defined yet? - return AArch64::X1; - } + getExceptionSelectorRegister(const Constant *PersonalityFn) const override; bool isIntDivCheap(EVT VT, AttributeList Attr) const override; @@ -1132,11 +1124,6 @@ class AArch64TargetLowering : public TargetLowering { SelectionDAG &DAG) const; SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT, - AArch64PACKey::ID Key, - SDValue Discriminator, - SDValue AddrDiscriminator, - SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 19ef6f4fb32e747..525538db8036c2c 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -11,6 +11,8 @@ //===----------------------------------------------------------------------===// #include "AArch64TargetMachine.h" +#include "Utils/AArch64SMEAttributes.h" + using namespace llvm; #define DEBUG_TYPE "aarch64-selectiondag-info" diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ff3c69f7e10c660..71f9bbbbc350415 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -10,6 +10,7 @@ #include "AArch64ExpandImm.h" #include "AArch64PerfectShuffle.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 6cbfb018b3183ad..065858c42894471 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -18,6 +18,7 @@ #include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ObjCARCUtil.h" From 66bbbf2e951a486f972a5a54378e6791193ade60 Mon Sep 17 00:00:00 2001 From: Jan Voung Date: Mon, 28 Oct 2024 15:13:29 -0400 Subject: [PATCH 209/425] [clang][dataflow] Cache accessors returning pointers in bugprone-unchecked-optional-access (#113922) Previously, we covered returning refs, or copies of optional, and bools. Now cover returning pointers (to any type). This is useful for cases like operator-> of smart pointers. Addresses more of issue llvm#58510 --- .../Models/UncheckedOptionalAccessModel.h | 8 +++ .../Models/UncheckedOptionalAccessModel.cpp | 20 +++++- .../UncheckedOptionalAccessModelTest.cpp | 72 ++++++++++++++++--- 3 files changed, 87 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h index 9d81cacb507351a..713494178b97bdb 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h +++ b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h @@ -37,6 +37,14 @@ struct UncheckedOptionalAccessModelOptions { /// can't identify when their results are used safely (across calls), /// resulting in false positives in all such cases. Note: this option does not /// cover access through `operator[]`. + /// FIXME: we currently cache and equate the result of const accessors + /// returning pointers, so cover the case of operator-> followed by + /// operator->, which covers the common case of smart pointers. We also cover + /// some limited cases of returning references (if return type is an optional + /// type), so cover some cases of operator* followed by operator*. We don't + /// cover mixing operator-> and operator*. Once we are confident in this const + /// accessor caching, we shouldn't need the IgnoreSmartPointerDereference + /// option anymore. bool IgnoreSmartPointerDereference = false; }; diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp index 31ae2b94f5b6174..da5dda063344f97 100644 --- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp +++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp @@ -338,6 +338,11 @@ auto isZeroParamConstMemberCall() { callee(cxxMethodDecl(parameterCountIs(0), isConst()))); } +auto isZeroParamConstMemberOperatorCall() { + return cxxOperatorCallExpr( + callee(cxxMethodDecl(parameterCountIs(0), isConst()))); +} + auto isNonConstMemberCall() { return cxxMemberCallExpr(callee(cxxMethodDecl(unless(isConst())))); } @@ -572,9 +577,10 @@ void handleConstMemberCall(const CallExpr *CE, return; } - // Cache if the const method returns a boolean type. + // Cache if the const method returns a boolean or pointer type. // We may decide to cache other return types in the future. - if (RecordLoc != nullptr && CE->getType()->isBooleanType()) { + if (RecordLoc != nullptr && + (CE->getType()->isBooleanType() || CE->getType()->isPointerType())) { Value *Val = State.Lattice.getOrCreateConstMethodReturnValue(*RecordLoc, CE, State.Env); if (Val == nullptr) @@ -597,6 +603,14 @@ void transferValue_ConstMemberCall(const CXXMemberCallExpr *MCE, MCE, dataflow::getImplicitObjectLocation(*MCE, State.Env), Result, State); } +void transferValue_ConstMemberOperatorCall( + const CXXOperatorCallExpr *OCE, const MatchFinder::MatchResult &Result, + LatticeTransferState &State) { + auto *RecordLoc = cast_or_null( + State.Env.getStorageLocation(*OCE->getArg(0))); + handleConstMemberCall(OCE, RecordLoc, Result, State); +} + void handleNonConstMemberCall(const CallExpr *CE, dataflow::RecordStorageLocation *RecordLoc, const MatchFinder::MatchResult &Result, @@ -1020,6 +1034,8 @@ auto buildTransferMatchSwitch() { // const accessor calls .CaseOfCFGStmt(isZeroParamConstMemberCall(), transferValue_ConstMemberCall) + .CaseOfCFGStmt(isZeroParamConstMemberOperatorCall(), + transferValue_ConstMemberOperatorCall) // non-const member calls that may modify the state of an object. .CaseOfCFGStmt(isNonConstMemberCall(), transferValue_NonConstMemberCall) diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp index 5b64eaca0e10d3a..de16f6be8eedbc9 100644 --- a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp @@ -1282,28 +1282,35 @@ static raw_ostream &operator<<(raw_ostream &OS, class UncheckedOptionalAccessTest : public ::testing::TestWithParam { protected: - void ExpectDiagnosticsFor(std::string SourceCode) { - ExpectDiagnosticsFor(SourceCode, ast_matchers::hasName("target")); + void ExpectDiagnosticsFor(std::string SourceCode, + bool IgnoreSmartPointerDereference = true) { + ExpectDiagnosticsFor(SourceCode, ast_matchers::hasName("target"), + IgnoreSmartPointerDereference); } - void ExpectDiagnosticsForLambda(std::string SourceCode) { + void ExpectDiagnosticsForLambda(std::string SourceCode, + bool IgnoreSmartPointerDereference = true) { ExpectDiagnosticsFor( - SourceCode, ast_matchers::hasDeclContext( - ast_matchers::cxxRecordDecl(ast_matchers::isLambda()))); + SourceCode, + ast_matchers::hasDeclContext( + ast_matchers::cxxRecordDecl(ast_matchers::isLambda())), + IgnoreSmartPointerDereference); } template - void ExpectDiagnosticsFor(std::string SourceCode, - FuncDeclMatcher FuncMatcher) { + void ExpectDiagnosticsFor(std::string SourceCode, FuncDeclMatcher FuncMatcher, + bool IgnoreSmartPointerDereference = true) { // Run in C++17 and C++20 mode to cover differences in the AST between modes // (e.g. C++20 can contain `CXXRewrittenBinaryOperator`). for (const char *CxxMode : {"-std=c++17", "-std=c++20"}) - ExpectDiagnosticsFor(SourceCode, FuncMatcher, CxxMode); + ExpectDiagnosticsFor(SourceCode, FuncMatcher, CxxMode, + IgnoreSmartPointerDereference); } template void ExpectDiagnosticsFor(std::string SourceCode, FuncDeclMatcher FuncMatcher, - const char *CxxMode) { + const char *CxxMode, + bool IgnoreSmartPointerDereference) { ReplaceAllOccurrences(SourceCode, "$ns", GetParam().NamespaceName); ReplaceAllOccurrences(SourceCode, "$optional", GetParam().TypeName); @@ -1328,8 +1335,7 @@ class UncheckedOptionalAccessTest template T Make(); )"); - UncheckedOptionalAccessModelOptions Options{ - /*IgnoreSmartPointerDereference=*/true}; + UncheckedOptionalAccessModelOptions Options{IgnoreSmartPointerDereference}; std::vector Diagnostics; llvm::Error Error = checkDataflow( AnalysisInputs( @@ -3721,6 +3727,50 @@ TEST_P(UncheckedOptionalAccessTest, ConstByValueAccessorWithModInBetween) { )cc"); } +TEST_P(UncheckedOptionalAccessTest, ConstPointerAccessor) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + $ns::$optional x; + }; + + struct MyUniquePtr { + A* operator->() const; + }; + + void target(MyUniquePtr p) { + if (p->x) { + *p->x; + } + } + )cc", + /*IgnoreSmartPointerDereference=*/false); +} + +TEST_P(UncheckedOptionalAccessTest, ConstPointerAccessorWithModInBetween) { + ExpectDiagnosticsFor(R"cc( + #include "unchecked_optional_access_test.h" + + struct A { + $ns::$optional x; + }; + + struct MyUniquePtr { + A* operator->() const; + void reset(A*); + }; + + void target(MyUniquePtr p) { + if (p->x) { + p.reset(nullptr); + *p->x; // [[unsafe]] + } + } + )cc", + /*IgnoreSmartPointerDereference=*/false); +} + TEST_P(UncheckedOptionalAccessTest, ConstBoolAccessor) { ExpectDiagnosticsFor(R"cc( #include "unchecked_optional_access_test.h" From 0d0abb351b5fcf49ccc46eba8b7f2a1f353a05a6 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 28 Oct 2024 20:14:08 +0100 Subject: [PATCH 210/425] [VPlan] Use ResumePhi to create reduction resume phis. (#110004) Use VPInstruction::ResumePhi to create phi nodes for reduction resume values in the scalar preheader, similar to how ResumePhis are used for first-order recurrence resume values after 9a5a8731e77. This allows simplifying createAndCollectMergePhiForReduction to only collect reduction resume phis when vectorizing epilogue loops and adding extra incoming edges from the main vector loop. Updating phis for the epilogue vector loops requires special attention, because additional incoming values from the bypass blocks need to be added. PR: https://github.com/llvm/llvm-project/pull/110004 --- .../Transforms/Vectorize/LoopVectorize.cpp | 130 +++++++++--------- .../RISCV/vplan-vp-intrinsics-reduction.ll | 9 ++ ...-order-recurrence-sink-replicate-region.ll | 2 + .../LoopVectorize/vplan-printing.ll | 9 ++ 4 files changed, 88 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 88086f24dfdce2b..778d928252e0519 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7562,67 +7562,62 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) { } } -// Check if \p RedResult is a ComputeReductionResult instruction, and if it is -// create a merge phi node for it. -static void createAndCollectMergePhiForReduction( - VPInstruction *RedResult, - VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock, - bool VectorizingEpilogue) { - if (!RedResult || - RedResult->getOpcode() != VPInstruction::ComputeReductionResult) +// If \p R is a ComputeReductionResult when vectorizing the epilog loop, +// fix the reduction's scalar PHI node by adding the incoming value from the +// main vector loop. +static void fixReductionScalarResumeWhenVectorizingEpilog( + VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock) { + auto *EpiRedResult = dyn_cast(R); + if (!EpiRedResult || + EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult) return; - auto *PhiR = cast(RedResult->getOperand(0)); - const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); - - Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane())); - auto *ResumePhi = - dyn_cast(PhiR->getStartValue()->getUnderlyingValue()); - if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { - auto *Cmp = cast(PhiR->getStartValue()->getUnderlyingValue()); - assert(Cmp->getPredicate() == CmpInst::ICMP_NE); - assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue()); - ResumePhi = cast(Cmp->getOperand(0)); - } - assert((!VectorizingEpilogue || ResumePhi) && - "when vectorizing the epilogue loop, we need a resume phi from main " - "vector loop"); - - // TODO: bc.merge.rdx should not be created here, instead it should be - // modeled in VPlan. - BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader(); - // Create a phi node that merges control-flow from the backedge-taken check - // block and the middle block. - auto *BCBlockPhi = - PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx", - LoopScalarPreHeader->getTerminator()->getIterator()); - - // If we are fixing reductions in the epilogue loop then we should already - // have created a bc.merge.rdx Phi after the main vector body. Ensure that - // we carry over the incoming values correctly. + auto *EpiRedHeaderPhi = + cast(EpiRedResult->getOperand(0)); + const RecurrenceDescriptor &RdxDesc = + EpiRedHeaderPhi->getRecurrenceDescriptor(); + Value *MainResumeValue = + EpiRedHeaderPhi->getStartValue()->getUnderlyingValue(); + if (RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) { + auto *Cmp = cast(MainResumeValue); + assert(Cmp->getPredicate() == CmpInst::ICMP_NE && + "AnyOf expected to start with ICMP_NE"); + assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue() && + "AnyOf expected to start by comparing main resume value to original " + "start value"); + MainResumeValue = Cmp->getOperand(0); + } + PHINode *MainResumePhi = cast(MainResumeValue); + + // When fixing reductions in the epilogue loop we should already have + // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry + // over the incoming values correctly. + using namespace VPlanPatternMatch; + auto IsResumePhi = [](VPUser *U) { + return match( + U, m_VPInstruction(m_VPValue(), m_VPValue())); + }; + assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 && + "ResumePhi must have a single user"); + auto *EpiResumePhiVPI = + cast(*find_if(EpiRedResult->users(), IsResumePhi)); + auto *EpiResumePhi = cast(State.get(EpiResumePhiVPI, true)); + BasicBlock *LoopScalarPreHeader = EpiResumePhi->getParent(); + bool Updated = false; for (auto *Incoming : predecessors(LoopScalarPreHeader)) { - if (Incoming == LoopMiddleBlock) - BCBlockPhi->addIncoming(FinalValue, Incoming); - else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming)) - BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming), - Incoming); - else - BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming); + if (is_contained(MainResumePhi->blocks(), Incoming)) { + assert(EpiResumePhi->getIncomingValueForBlock(Incoming) == + RdxDesc.getRecurrenceStartValue() && + "Trying to reset unexpected value"); + assert(!Updated && "Should update at most 1 incoming value"); + EpiResumePhi->setIncomingValueForBlock( + Incoming, MainResumePhi->getIncomingValueForBlock(Incoming)); + Updated = true; + } } - - auto *OrigPhi = cast(PhiR->getUnderlyingValue()); - // TODO: This fixup should instead be modeled in VPlan. - // Fix the scalar loop reduction variable with the incoming reduction sum - // from the vector body and from the backedge value. - int IncomingEdgeBlockIdx = - OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch()); - assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); - // Pick the other block. - int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); - OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); - Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); - OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); + assert(Updated && "Must update EpiResumePhi."); + (void)Updated; } DenseMap LoopVectorizationPlanner::executePlan( @@ -7713,11 +7708,11 @@ DenseMap LoopVectorizationPlanner::executePlan( // 2.5 Collect reduction resume values. auto *ExitVPBB = cast(BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); - for (VPRecipeBase &R : *ExitVPBB) { - createAndCollectMergePhiForReduction( - dyn_cast(&R), State, OrigLoop, - State.CFG.VPBB2IRBB[ExitVPBB], VectorizingEpilogue); - } + if (VectorizingEpilogue) + for (VPRecipeBase &R : *ExitVPBB) { + fixReductionScalarResumeWhenVectorizingEpilog( + &R, State, State.CFG.VPBB2IRBB[ExitVPBB]); + } // 2.6. Maintain Loop Hints // Keep all loop hints from the original loop on the vector loop (we'll @@ -9518,6 +9513,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( }); FinalReductionResult->insertBefore(*MiddleVPBB, IP); + // Order is strict: if there are multiple successors, the first is the exit + // block, second is the scalar preheader. + VPBasicBlock *ScalarPHVPBB = + cast(MiddleVPBB->getSuccessors().back()); + VPBuilder ScalarPHBuilder(ScalarPHVPBB); + auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( + VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()}, + {}, "bc.merge.rdx"); + auto *RedPhi = cast(PhiR->getUnderlyingInstr()); + Plan->addLiveOut(RedPhi, ResumePhiRecipe); + // Adjust AnyOf reductions; replace the reduction phi for the selected value // with a boolean reduction phi node to check if the condition is true in // any iteration. The final value is selected by the final diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll index 1326751a847d7d5..59db6c197ef8ca2 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll @@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: No successors ; IF-EVL-INLOOP-EMPTY: ; IF-EVL-INLOOP-NEXT: scalar.ph: +; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; IF-EVL-INLOOP-NEXT: No successors +; IF-EVL-INLOOP-EMPTY: +; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]> ; IF-EVL-INLOOP-NEXT: } ; @@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-OUTLOOP-NEXT: No successors ; NO-VP-OUTLOOP-EMPTY: ; NO-VP-OUTLOOP-NEXT: scalar.ph: +; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; NO-VP-OUTLOOP-NEXT: No successors +; NO-VP-OUTLOOP-EMPTY: +; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]> ; NO-VP-OUTLOOP-NEXT: } ; @@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) { ; NO-VP-INLOOP-NEXT: No successors ; NO-VP-INLOOP-EMPTY: ; NO-VP-INLOOP-NEXT: scalar.ph: +; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start> ; NO-VP-INLOOP-NEXT: No successors +; NO-VP-INLOOP-EMPTY: +; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]> ; NO-VP-INLOOP-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 8e56614a2e3d5c7..b05980bef1b38f2 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph ; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> +; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]> ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 0dde507d08be747..2247295295663e1 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]> ; CHECK-NEXT: } ; entry: @@ -221,7 +224,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]> ; CHECK-NEXT: } ; entry: @@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]> ; CHECK-NEXT:} entry: From ad5b9441f949716570e89fcb27b76e9bfb4b7f70 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Mon, 28 Oct 2024 12:25:12 -0700 Subject: [PATCH 211/425] [rtsan][asan] NFC Fix hyperlink to CMake doc (#113931) --- clang/docs/AddressSanitizer.rst | 2 +- clang/docs/RealtimeSanitizer.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/docs/AddressSanitizer.rst b/clang/docs/AddressSanitizer.rst index 76fdf559950599c..d937cbfdf583c4b 100644 --- a/clang/docs/AddressSanitizer.rst +++ b/clang/docs/AddressSanitizer.rst @@ -26,7 +26,7 @@ Typical slowdown introduced by AddressSanitizer is **2x**. How to build ============ -Build LLVM/Clang with `CMake ` and enable +Build LLVM/Clang with `CMake `_ and enable the ``compiler-rt`` runtime. An example CMake configuration that will allow for the use/testing of AddressSanitizer: diff --git a/clang/docs/RealtimeSanitizer.rst b/clang/docs/RealtimeSanitizer.rst index b09162cd99f450d..41b8bbb33baf144 100644 --- a/clang/docs/RealtimeSanitizer.rst +++ b/clang/docs/RealtimeSanitizer.rst @@ -21,7 +21,7 @@ The runtime slowdown introduced by RealtimeSanitizer is negligible. How to build ============ -Build LLVM/Clang with `CMake ` and enable the +Build LLVM/Clang with `CMake `_ and enable the ``compiler-rt`` runtime. An example CMake configuration that will allow for the use/testing of RealtimeSanitizer: From 19131c7f36e047898ea954ee5a187ac62f2ab09b Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Mon, 28 Oct 2024 12:49:28 -0700 Subject: [PATCH 212/425] [clang][modules][lldb] Fix build after #113391 Instead of changing the return type of `ModuleMap::findOrCreateModule`, this patch adds a counterpart that only returns `Module *` and thus has the same signature as `createModule()`, which is important in `ASTReader`. --- clang/include/clang/Lex/ModuleMap.h | 14 +++++++++++--- clang/lib/Lex/ModuleMap.cpp | 23 +++++++++++++---------- clang/lib/Serialization/ASTReader.cpp | 5 +++-- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index 5ee152e4213abf0..53e9e0ec83ddb1f 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -541,9 +541,17 @@ class ModuleMap { /// /// \param IsExplicit Whether this is an explicit submodule. /// - /// \returns The found or newly-created module. - Module *findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework, - bool IsExplicit); + /// \returns The found or newly-created module, along with a boolean value + /// that will be true if the module is newly-created. + std::pair findOrCreateModule(StringRef Name, Module *Parent, + bool IsFramework, + bool IsExplicit); + /// Call \c ModuleMap::findOrCreateModule and throw away the information + /// whether the module was found or created. + Module *findOrCreateModuleFirst(StringRef Name, Module *Parent, + bool IsFramework, bool IsExplicit) { + return findOrCreateModule(Name, Parent, IsFramework, IsExplicit).first; + } /// Create new submodule, assuming it does not exist. This function can only /// be called when it is guaranteed that this submodule does not exist yet. /// The parameters have same semantics as \c ModuleMap::findOrCreateModule. diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 10774429a2177b7..dc9d2bfd5629c95 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -655,8 +655,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(SkippedDir.getName()), NameBuf); - Result = - findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit); + Result = findOrCreateModuleFirst(Name, Result, /*IsFramework=*/false, + Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); // Associate the module and the directory. @@ -672,8 +672,8 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) { SmallString<32> NameBuf; StringRef Name = sanitizeFilenameAsIdentifier( llvm::sys::path::stem(File.getName()), NameBuf); - Result = - findOrCreateModule(Name, Result, /*IsFramework=*/false, Explicit); + Result = findOrCreateModuleFirst(Name, Result, /*IsFramework=*/false, + Explicit); setInferredModuleAllowedBy(Result, UmbrellaModuleMap); Result->addTopHeader(File); @@ -857,14 +857,17 @@ Module *ModuleMap::lookupModuleQualified(StringRef Name, Module *Context) const{ return Context->findSubmodule(Name); } -Module *ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, - bool IsFramework, bool IsExplicit) { +std::pair ModuleMap::findOrCreateModule(StringRef Name, + Module *Parent, + bool IsFramework, + bool IsExplicit) { // Try to find an existing module with this name. if (Module *Sub = lookupModuleQualified(Name, Parent)) - return Sub; + return std::make_pair(Sub, false); // Create a new module with this name. - return createModule(Name, Parent, IsFramework, IsExplicit); + Module *M = createModule(Name, Parent, IsFramework, IsExplicit); + return std::make_pair(M, true); } Module *ModuleMap::createModule(StringRef Name, Module *Parent, @@ -2129,8 +2132,8 @@ void ModuleMapParser::parseModuleDecl() { ActiveModule = Map.createShadowedModule(ModuleName, Framework, ShadowingModule); } else { - ActiveModule = - Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit); + ActiveModule = Map.findOrCreateModuleFirst(ModuleName, ActiveModule, + Framework, Explicit); } ActiveModule->DefinitionLoc = ModuleNameLoc; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 74a79ac54bb4eb2..8d8f9378cfeabee 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -5760,8 +5760,9 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F, // If we don't know the top-level module, there's no point in doing qualified // lookup of its submodules; it won't find anything anywhere within this tree. // Let's skip that and avoid some string lookups. - auto CreateModule = !KnowsTopLevelModule ? &ModuleMap::createModule - : &ModuleMap::findOrCreateModule; + auto CreateModule = !KnowsTopLevelModule + ? &ModuleMap::createModule + : &ModuleMap::findOrCreateModuleFirst; bool First = true; Module *CurrentModule = nullptr; From 67bcce21415c7f687c28eb727c40b27924335f5a Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 29 Oct 2024 03:14:42 +0700 Subject: [PATCH 213/425] [CFI][LowerTypeTests] Fix indirect call with alias (#106185) Motivation example: ``` > cat test.cpp extern "C" [[gnu::weak]] void f() {} void alias() __attribute__((alias("f"))); int main() { auto p = alias; p(); } > clang test.cpp -fsanitize=cfi-icall -flto=thin -fuse-ld=lld > ./a.out [1] 1868 illegal hardware instruction ./a.out ``` If the address of a function was only taken through its alias, the function was not considered exported and therefore was not included in the CFI jumptable. This resulted in `@llvm.type.test()` being lowered to `false`, and consequently the indirect call to the function was eventually optimized to `ubsantrap()`. --- llvm/include/llvm/IR/ModuleSummaryIndexYAML.h | 102 +++++++++++++----- llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 6 +- .../LowerTypeTests/cfi-icall-alias.ll | 54 ++++++++++ 3 files changed, 133 insertions(+), 29 deletions(-) create mode 100644 llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h index 902d1305c818acf..d12bc260f5cf4e2 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -135,10 +135,14 @@ template <> struct MappingTraits { } }; -struct FunctionSummaryYaml { +struct GlobalValueSummaryYaml { + // Commonly used fields unsigned Linkage, Visibility; bool NotEligibleToImport, Live, IsLocal, CanAutoHide; unsigned ImportType; + // Fields for AliasSummary + std::optional Aliasee; + // Fields for FunctionSummary std::vector Refs; std::vector TypeTests; std::vector TypeTestAssumeVCalls, @@ -176,8 +180,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummary::ConstVCall) namespace llvm { namespace yaml { -template <> struct MappingTraits { - static void mapping(IO &io, FunctionSummaryYaml& summary) { +template <> struct MappingTraits { + static void mapping(IO &io, GlobalValueSummaryYaml &summary) { io.mapOptional("Linkage", summary.Linkage); io.mapOptional("Visibility", summary.Visibility); io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport); @@ -185,6 +189,7 @@ template <> struct MappingTraits { io.mapOptional("Local", summary.IsLocal); io.mapOptional("CanAutoHide", summary.CanAutoHide); io.mapOptional("ImportType", summary.ImportType); + io.mapOptional("Aliasee", summary.Aliasee); io.mapOptional("Refs", summary.Refs); io.mapOptional("TypeTests", summary.TypeTests); io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls); @@ -199,7 +204,7 @@ template <> struct MappingTraits { } // End yaml namespace } // End llvm namespace -LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml) +LLVM_YAML_IS_SEQUENCE_VECTOR(GlobalValueSummaryYaml) namespace llvm { namespace yaml { @@ -207,61 +212,99 @@ namespace yaml { // FIXME: Add YAML mappings for the rest of the module summary. template <> struct CustomMappingTraits { static void inputOne(IO &io, StringRef Key, GlobalValueSummaryMapTy &V) { - std::vector FSums; - io.mapRequired(Key.str().c_str(), FSums); + std::vector GVSums; + io.mapRequired(Key.str().c_str(), GVSums); uint64_t KeyInt; if (Key.getAsInteger(0, KeyInt)) { io.setError("key not an integer"); return; } auto &Elem = V.try_emplace(KeyInt, /*IsAnalysis=*/false).first->second; - for (auto &FSum : FSums) { + for (auto &GVSum : GVSums) { + GlobalValueSummary::GVFlags GVFlags( + static_cast(GVSum.Linkage), + static_cast(GVSum.Visibility), + GVSum.NotEligibleToImport, GVSum.Live, GVSum.IsLocal, + GVSum.CanAutoHide, + static_cast(GVSum.ImportType)); + if (GVSum.Aliasee) { + auto ASum = std::make_unique(GVFlags); + if (!V.count(*GVSum.Aliasee)) + V.emplace(*GVSum.Aliasee, /*IsAnalysis=*/false); + ValueInfo AliaseeVI(/*IsAnalysis=*/false, &*V.find(*GVSum.Aliasee)); + // Note: Aliasee cannot be filled until all summaries are loaded. + // This is done in fixAliaseeLinks() which is called in + // MappingTraits::mapping(). + ASum->setAliasee(AliaseeVI, /*Aliasee=*/nullptr); + Elem.SummaryList.push_back(std::move(ASum)); + continue; + } SmallVector Refs; - Refs.reserve(FSum.Refs.size()); - for (auto &RefGUID : FSum.Refs) { + Refs.reserve(GVSum.Refs.size()); + for (auto &RefGUID : GVSum.Refs) { auto It = V.try_emplace(RefGUID, /*IsAnalysis=*/false).first; Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*It)); } Elem.SummaryList.push_back(std::make_unique( - GlobalValueSummary::GVFlags( - static_cast(FSum.Linkage), - static_cast(FSum.Visibility), - FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, - FSum.CanAutoHide, - static_cast(FSum.ImportType)), - /*NumInsts=*/0, FunctionSummary::FFlags{}, std::move(Refs), - SmallVector{}, std::move(FSum.TypeTests), - std::move(FSum.TypeTestAssumeVCalls), - std::move(FSum.TypeCheckedLoadVCalls), - std::move(FSum.TypeTestAssumeConstVCalls), - std::move(FSum.TypeCheckedLoadConstVCalls), + GVFlags, /*NumInsts=*/0, FunctionSummary::FFlags{}, std::move(Refs), + SmallVector{}, std::move(GVSum.TypeTests), + std::move(GVSum.TypeTestAssumeVCalls), + std::move(GVSum.TypeCheckedLoadVCalls), + std::move(GVSum.TypeTestAssumeConstVCalls), + std::move(GVSum.TypeCheckedLoadConstVCalls), ArrayRef{}, ArrayRef{}, ArrayRef{})); } } static void output(IO &io, GlobalValueSummaryMapTy &V) { for (auto &P : V) { - std::vector FSums; + std::vector GVSums; for (auto &Sum : P.second.SummaryList) { if (auto *FSum = dyn_cast(Sum.get())) { std::vector Refs; Refs.reserve(FSum->refs().size()); for (auto &VI : FSum->refs()) Refs.push_back(VI.getGUID()); - FSums.push_back(FunctionSummaryYaml{ + GVSums.push_back(GlobalValueSummaryYaml{ FSum->flags().Linkage, FSum->flags().Visibility, static_cast(FSum->flags().NotEligibleToImport), static_cast(FSum->flags().Live), static_cast(FSum->flags().DSOLocal), static_cast(FSum->flags().CanAutoHide), - FSum->flags().ImportType, Refs, FSum->type_tests(), - FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(), + FSum->flags().ImportType, /*Aliasee=*/std::nullopt, Refs, + FSum->type_tests(), FSum->type_test_assume_vcalls(), + FSum->type_checked_load_vcalls(), FSum->type_test_assume_const_vcalls(), FSum->type_checked_load_const_vcalls()}); - } + } else if (auto *ASum = dyn_cast(Sum.get()); + ASum && ASum->hasAliasee()) { + GVSums.push_back(GlobalValueSummaryYaml{ + ASum->flags().Linkage, ASum->flags().Visibility, + static_cast(ASum->flags().NotEligibleToImport), + static_cast(ASum->flags().Live), + static_cast(ASum->flags().DSOLocal), + static_cast(ASum->flags().CanAutoHide), + ASum->flags().ImportType, + /*Aliasee=*/ASum->getAliaseeGUID()}); + } + } + if (!GVSums.empty()) + io.mapRequired(llvm::utostr(P.first).c_str(), GVSums); + } + } + static void fixAliaseeLinks(GlobalValueSummaryMapTy &V) { + for (auto &P : V) { + for (auto &Sum : P.second.SummaryList) { + if (auto *Alias = dyn_cast(Sum.get())) { + ValueInfo AliaseeVI = Alias->getAliaseeVI(); + auto AliaseeSL = AliaseeVI.getSummaryList(); + if (AliaseeSL.empty()) { + ValueInfo EmptyVI; + Alias->setAliasee(EmptyVI, nullptr); + } else + Alias->setAliasee(AliaseeVI, AliaseeSL[0].get()); + } } - if (!FSums.empty()) - io.mapRequired(llvm::utostr(P.first).c_str(), FSums); } } }; @@ -281,6 +324,9 @@ template <> struct CustomMappingTraits { template <> struct MappingTraits { static void mapping(IO &io, ModuleSummaryIndex& index) { io.mapOptional("GlobalValueMap", index.GlobalValueMap); + if (!io.outputting()) + CustomMappingTraits::fixAliaseeLinks( + index.GlobalValueMap); io.mapOptional("TypeIdMap", index.TypeIdMap); io.mapOptional("WithGlobalValueDeadStripping", index.WithGlobalValueDeadStripping); diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 3fcfc6a876776d9..6ba371069bb2302 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -2083,8 +2083,12 @@ bool LowerTypeTestsModule::lower() { for (auto &I : *ExportSummary) for (auto &GVS : I.second.SummaryList) if (GVS->isLive()) - for (const auto &Ref : GVS->refs()) + for (const auto &Ref : GVS->refs()) { AddressTaken.insert(Ref.getGUID()); + for (auto &RefGVS : Ref.getSummaryList()) + if (auto Alias = dyn_cast(RefGVS.get())) + AddressTaken.insert(Alias->getAliaseeGUID()); + } NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); if (CfiFunctionsMD) { diff --git a/llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll b/llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll new file mode 100644 index 000000000000000..0c5324ee96c9391 --- /dev/null +++ b/llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll @@ -0,0 +1,54 @@ +;; Check that if the address of a weak function is only taken through an alias, +;; it is still added to a list of exported functions and @llvm.type.test() is +;; lowered to an actual check against the generated CFI jumptable. + +RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir +RUN: opt test.ll --thinlto-bc --thinlto-split-lto-unit -o test.bc +RUN: llvm-modextract test.bc -n 0 -o test0.bc +RUN: llvm-modextract test.bc -n 1 -o test1.bc + +;; Check that a CFI jumptable is generated. +RUN: opt test1.bc -passes=lowertypetests -lowertypetests-read-summary=in.yaml \ +RUN: -lowertypetests-summary-action=export -lowertypetests-write-summary=exported.yaml \ +RUN: -S -o - | FileCheck %s --check-prefix=REGULAR +REGULAR: @__typeid__ZTSFvvE_global_addr = hidden alias i8, ptr @.cfi.jumptable +REGULAR: @f = alias void (), ptr @.cfi.jumptable +REGULAR: define private void @.cfi.jumptable() + +;; CHECK that @llvm.type.test() is lowered to an actual check. +RUN: opt test0.bc -passes=lowertypetests -lowertypetests-read-summary=exported.yaml \ +RUN: -lowertypetests-summary-action=import -S -o - | FileCheck %s --check-prefix=THIN +THIN: define i1 @test() { +THIN-NEXT: %1 = icmp eq i64 ptrtoint (ptr @alias to i64), ptrtoint (ptr @__typeid__ZTSFvvE_global_addr to i64) +THIN-NEXT: ret i1 %1 +THIN-NEXT: } + +;--- test.ll +target triple = "x86_64-pc-linux-gnu" + +@alias = alias void(), ptr @f + +define weak void @f() !type !0 { + ret void +} + +define i1 @test() { + %1 = call i1 @llvm.type.test(ptr nonnull @alias, metadata !"_ZTSFvvE") + ret i1 %1 +} + +declare i1 @llvm.type.test(ptr, metadata) + +!0 = !{i64 0, !"_ZTSFvvE"} +;--- in.yaml +--- +GlobalValueMap: + 8346051122425466633: # guid("test") + - Live: true + Refs: [5833419078793185394] # guid("alias") + TypeTests: [9080559750644022485] # guid("_ZTSFvvE") + 5833419078793185394: # guid("alias") + - Aliasee: 14740650423002898831 # guid("f") + 14740650423002898831: # guid("f") + - +... From 7bd8a165f95123e390f9cbb0a6a5e60d835a4461 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Oct 2024 13:20:46 -0700 Subject: [PATCH 214/425] [X86] Don't allow '+f' as an inline asm constraint. (#113871) f cannot be used as an output constraint. We already errored for '=f' but not '+f'. Fixes #113692. --- clang/lib/Basic/Targets/X86.cpp | 2 +- clang/test/Sema/asm.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index d067ec218b52708..700c2f9a5dbd18d 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1465,7 +1465,7 @@ bool X86TargetInfo::validateAsmConstraint( } case 'f': // Any x87 floating point stack register. // Constraint 'f' cannot be used for output operands. - if (Info.ConstraintStr[0] == '=') + if (Info.ConstraintStr[0] == '=' || Info.ConstraintStr[0] == '+') return false; Info.setAllowsRegister(); return true; diff --git a/clang/test/Sema/asm.c b/clang/test/Sema/asm.c index 6cd95c71604d443..28ef3ec6ce09c2a 100644 --- a/clang/test/Sema/asm.c +++ b/clang/test/Sema/asm.c @@ -204,6 +204,12 @@ double f_output_constraint(void) { return result; } +double f_output_constraint_2(void) { + double result; + __asm("foo1": "+f" (result)); // expected-error {{invalid output constraint '+f' in asm}} + return result; +} + void fn1(void) { int l; __asm__("" From 481bce018ea8872277f79102842eaf8a55f634a2 Mon Sep 17 00:00:00 2001 From: joaosaffran <126493771+joaosaffran@users.noreply.github.com> Date: Mon, 28 Oct 2024 13:26:59 -0700 Subject: [PATCH 215/425] Adding splitdouble HLSL function (#109331) - Adding hlsl `splitdouble` intrinsics - Adding DXIL lowering - Adding SPIRV lowering - Adding test Fixes: #108901 --------- Co-authored-by: Joao Saffran --- clang/include/clang/Basic/Builtins.td | 6 ++ clang/lib/CodeGen/CGBuiltin.cpp | 82 +++++++++++++++++ clang/lib/CodeGen/CGCall.cpp | 14 +-- clang/lib/CodeGen/CGExpr.cpp | 13 ++- clang/lib/CodeGen/CodeGenFunction.h | 10 +- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 18 ++++ clang/lib/Sema/SemaHLSL.cpp | 74 +++++++++++---- .../CodeGenHLSL/builtins/splitdouble.hlsl | 91 +++++++++++++++++++ .../test/SemaHLSL/BuiltIns/asuint-errors.hlsl | 30 ++++++ .../SemaHLSL/BuiltIns/splitdouble-errors.hlsl | 76 ++++++++++++++++ llvm/lib/Target/DirectX/DXIL.td | 10 ++ llvm/lib/Target/DirectX/DXILOpBuilder.cpp | 13 +++ llvm/lib/Target/DirectX/DXILOpBuilder.h | 4 + llvm/lib/Target/DirectX/DXILOpLowering.cpp | 55 +++++++++++ llvm/test/CodeGen/DirectX/split-double.ll | 45 --------- llvm/test/CodeGen/DirectX/splitdouble.ll | 76 ++++++++++++++++ .../SPIRV/hlsl-intrinsics/splitdouble.ll | 40 ++++++++ 17 files changed, 581 insertions(+), 76 deletions(-) create mode 100644 clang/test/CodeGenHLSL/builtins/splitdouble.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl delete mode 100644 llvm/test/CodeGen/DirectX/split-double.ll create mode 100644 llvm/test/CodeGen/DirectX/splitdouble.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 90475a361bb8f86..9bd67e0cefebc32 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4871,6 +4871,12 @@ def HLSLRadians : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_splitdouble"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a57c95d5b966721..65d7f5c54a1913e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17,6 +17,7 @@ #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGRecordLayout.h" +#include "CGValue.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -25,8 +26,10 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" #include "clang/AST/OSLog.h" #include "clang/AST/OperationKinds.h" +#include "clang/AST/Type.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" @@ -67,6 +70,7 @@ #include "llvm/TargetParser/X86TargetParser.h" #include #include +#include using namespace clang; using namespace CodeGen; @@ -95,6 +99,76 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, I->addAnnotationMetadata("auto-init"); } +static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { + Value *Op0 = CGF->EmitScalarExpr(E->getArg(0)); + const auto *OutArg1 = dyn_cast(E->getArg(1)); + const auto *OutArg2 = dyn_cast(E->getArg(2)); + + CallArgList Args; + LValue Op1TmpLValue = + CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType()); + LValue Op2TmpLValue = + CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType()); + + if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) + Args.reverseWritebacks(); + + Value *LowBits = nullptr; + Value *HighBits = nullptr; + + if (CGF->CGM.getTarget().getTriple().isDXIL()) { + + llvm::Type *RetElementTy = CGF->Int32Ty; + if (auto *Op0VecTy = E->getArg(0)->getType()->getAs()) + RetElementTy = llvm::VectorType::get( + CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements())); + auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy); + + CallInst *CI = CGF->Builder.CreateIntrinsic( + RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble"); + + LowBits = CGF->Builder.CreateExtractValue(CI, 0); + HighBits = CGF->Builder.CreateExtractValue(CI, 1); + + } else { + // For Non DXIL targets we generate the instructions. + + if (!Op0->getType()->isVectorTy()) { + FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2); + Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy); + + LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0); + HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1); + } else { + int NumElements = 1; + if (const auto *VecTy = + E->getArg(0)->getType()->getAs()) + NumElements = VecTy->getNumElements(); + + FixedVectorType *Uint32VecTy = + FixedVectorType::get(CGF->Int32Ty, NumElements * 2); + Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy); + if (NumElements == 1) { + LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0); + HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1); + } else { + SmallVector EvenMask, OddMask; + for (int I = 0, E = NumElements; I != E; ++I) { + EvenMask.push_back(I * 2); + OddMask.push_back(I * 2 + 1); + } + LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask); + HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask); + } + } + } + CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress()); + auto *LastInst = + CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress()); + CGF->EmitWritebacks(Args); + return LastInst; +} + /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, @@ -18959,6 +19033,14 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: { CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef{Op0}, nullptr, "hlsl.radians"); } + case Builtin::BI__builtin_hlsl_elementwise_splitdouble: { + + assert((E->getArg(0)->getType()->hasFloatingRepresentation() && + E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() && + E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) && + "asuint operands types mismatch"); + return handleHlslSplitdouble(E, this); + } } return nullptr; } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 64e60f0616d77b7..8f4f5d3ed816012 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include "llvm/Support/Path.h" #include "llvm/Transforms/Utils/Local.h" #include using namespace clang; @@ -4243,12 +4244,6 @@ static void emitWriteback(CodeGenFunction &CGF, CGF.EmitBlock(contBB); } -static void emitWritebacks(CodeGenFunction &CGF, - const CallArgList &args) { - for (const auto &I : args.writebacks()) - emitWriteback(CGF, I); -} - static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF, const CallArgList &CallArgs) { ArrayRef Cleanups = @@ -4717,6 +4712,11 @@ void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const { IsUsed = true; } +void CodeGenFunction::EmitWritebacks(const CallArgList &args) { + for (const auto &I : args.writebacks()) + emitWriteback(*this, I); +} + void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, QualType type) { DisableDebugLocationUpdates Dis(*this, E); @@ -5940,7 +5940,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Emit any call-associated writebacks immediately. Arguably this // should happen after any return-value munging. if (CallArgs.hasWritebacks()) - emitWritebacks(*this, CallArgs); + EmitWritebacks(CallArgs); // The stack cleanup for inalloca arguments has to run out of the normal // lexical order, so deactivate it and run it manually here. diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index e0ea65bcaf36372..e90e8da3e9f1ea1 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5460,9 +5460,8 @@ LValue CodeGenFunction::EmitOpaqueValueLValue(const OpaqueValueExpr *e) { return getOrCreateOpaqueLValueMapping(e); } -void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, - CallArgList &Args, QualType Ty) { - +std::pair +CodeGenFunction::EmitHLSLOutArgLValues(const HLSLOutArgExpr *E, QualType Ty) { // Emitting the casted temporary through an opaque value. LValue BaseLV = EmitLValue(E->getArgLValue()); OpaqueValueMappingData::bind(*this, E->getOpaqueArgLValue(), BaseLV); @@ -5476,6 +5475,13 @@ void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, TempLV); OpaqueValueMappingData::bind(*this, E->getCastedTemporary(), TempLV); + return std::make_pair(BaseLV, TempLV); +} + +LValue CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, + CallArgList &Args, QualType Ty) { + + auto [BaseLV, TempLV] = EmitHLSLOutArgLValues(E, Ty); llvm::Value *Addr = TempLV.getAddress().getBasePointer(); llvm::Type *ElTy = ConvertTypeForMem(TempLV.getType()); @@ -5488,6 +5494,7 @@ void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast(), LifetimeSize); Args.add(RValue::get(TmpAddr, *this), Ty); + return TempLV; } LValue diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 750a6cc24badca9..3ff4458fb320243 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4296,8 +4296,11 @@ class CodeGenFunction : public CodeGenTypeCache { LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E); LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e); LValue EmitHLSLArrayAssignLValue(const BinaryOperator *E); - void EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, - QualType Ty); + + std::pair EmitHLSLOutArgLValues(const HLSLOutArgExpr *E, + QualType Ty); + LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args, + QualType Ty); Address EmitExtVectorElementLValue(LValue V); @@ -5147,6 +5150,9 @@ class CodeGenFunction : public CodeGenTypeCache { SourceLocation ArgLoc, AbstractCallee AC, unsigned ParmNum); + /// EmitWriteback - Emit callbacks for function. + void EmitWritebacks(const CallArgList &Args); + /// EmitCallArg - Emit a single call argument. void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType); diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 30dce60b3ff7029..8ade4b27f360fbc 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -438,6 +438,24 @@ template constexpr uint asuint(T F) { return __detail::bit_cast(F); } +//===----------------------------------------------------------------------===// +// asuint splitdouble builtins +//===----------------------------------------------------------------------===// + +/// \fn void asuint(double D, out uint lowbits, out int highbits) +/// \brief Split and interprets the lowbits and highbits of double D into uints. +/// \param D The input double. +/// \param lowbits The output lowbits of D. +/// \param highbits The output highbits of D. +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble) +void asuint(double, out uint, out uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble) +void asuint(double2, out uint2, out uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble) +void asuint(double3, out uint3, out uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble) +void asuint(double4, out uint4, out uint4); + //===----------------------------------------------------------------------===// // atan builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 1f6c5b8d4561bcd..a472538236e2d91 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -1698,18 +1698,27 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) { return true; } -static bool CheckArgsTypesAreCorrect( +bool CheckArgTypeIsCorrect( + Sema *S, Expr *Arg, QualType ExpectedType, + llvm::function_ref Check) { + QualType PassedType = Arg->getType(); + if (Check(PassedType)) { + if (auto *VecTyA = PassedType->getAs()) + ExpectedType = S->Context.getVectorType( + ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind()); + S->Diag(Arg->getBeginLoc(), diag::err_typecheck_convert_incompatible) + << PassedType << ExpectedType << 1 << 0 << 0; + return true; + } + return false; +} + +bool CheckAllArgTypesAreCorrect( Sema *S, CallExpr *TheCall, QualType ExpectedType, llvm::function_ref Check) { for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) { - QualType PassedType = TheCall->getArg(i)->getType(); - if (Check(PassedType)) { - if (auto *VecTyA = PassedType->getAs()) - ExpectedType = S->Context.getVectorType( - ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind()); - S->Diag(TheCall->getArg(0)->getBeginLoc(), - diag::err_typecheck_convert_incompatible) - << PassedType << ExpectedType << 1 << 0 << 0; + Expr *Arg = TheCall->getArg(i); + if (CheckArgTypeIsCorrect(S, Arg, ExpectedType, Check)) { return true; } } @@ -1720,8 +1729,8 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllFloatTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasFloatingRepresentation(); }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy, - checkAllFloatTypes); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy, + checkAllFloatTypes); } static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) { @@ -1732,8 +1741,19 @@ static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) { : PassedType; return !BaseType->isHalfType() && !BaseType->isFloat32Type(); }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy, - checkFloatorHalf); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy, + checkFloatorHalf); +} + +static bool CheckModifiableLValue(Sema *S, CallExpr *TheCall, + unsigned ArgIndex) { + auto *Arg = TheCall->getArg(ArgIndex); + SourceLocation OrigLoc = Arg->getExprLoc(); + if (Arg->IgnoreCasts()->isModifiableLvalue(S->Context, &OrigLoc) == + Expr::MLV_Valid) + return false; + S->Diag(OrigLoc, diag::error_hlsl_inout_lvalue) << Arg << 0; + return true; } static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) { @@ -1742,24 +1762,24 @@ static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) { return VecTy->getElementType()->isDoubleType(); return false; }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy, - checkDoubleVector); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy, + checkDoubleVector); } static bool CheckFloatingOrIntRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllSignedTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasIntegerRepresentation() && !PassedType->hasFloatingRepresentation(); }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.IntTy, - checkAllSignedTypes); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.IntTy, + checkAllSignedTypes); } static bool CheckUnsignedIntRepresentation(Sema *S, CallExpr *TheCall) { auto checkAllUnsignedTypes = [](clang::QualType PassedType) -> bool { return !PassedType->hasUnsignedIntegerRepresentation(); }; - return CheckArgsTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy, - checkAllUnsignedTypes); + return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy, + checkAllUnsignedTypes); } static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall, @@ -2074,6 +2094,22 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return true; break; } + case Builtin::BI__builtin_hlsl_elementwise_splitdouble: { + if (SemaRef.checkArgCount(TheCall, 3)) + return true; + + if (CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.DoubleTy, 0) || + CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.UnsignedIntTy, + 1) || + CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.UnsignedIntTy, + 2)) + return true; + + if (CheckModifiableLValue(&SemaRef, TheCall, 1) || + CheckModifiableLValue(&SemaRef, TheCall, 2)) + return true; + break; + } case Builtin::BI__builtin_elementwise_acos: case Builtin::BI__builtin_elementwise_asin: case Builtin::BI__builtin_elementwise_atan: diff --git a/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl b/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl new file mode 100644 index 000000000000000..a883c9d5cc3555e --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-vulkan-library %s -fnative-half-type -emit-llvm -O0 -o - | FileCheck %s --check-prefix=SPIRV + + + +// CHECK: define {{.*}} i32 {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]]) +// CHECK: [[VALRET:%.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[VALD]]) +// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} i32 {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load double, ptr [[VALD]].addr, align 8 +// SPIRV-NEXT: [[CAST:%.*]] = bitcast double [[LOAD]] to <2 x i32> +// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 0 +// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 1 +uint test_scalar(double D) { + uint A, B; + asuint(D, A, B); + return A + B; +} + +// CHECK: define {{.*}} <1 x i32> {{.*}}test_double1{{.*}}(<1 x double> {{.*}} [[VALD:%.*]]) +// CHECK: [[TRUNC:%.*]] = extractelement <1 x double> %D, i64 0 +// CHECK-NEXT: [[VALRET:%.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[TRUNC]]) +// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} <1 x i32> {{.*}}test_double1{{.*}}(<1 x double> {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load <1 x double>, ptr [[VALD]].addr, align 8 +// SPIRV-NEXT: [[TRUNC:%.*]] = extractelement <1 x double> [[LOAD]], i64 0 +// SPIRV-NEXT: [[CAST:%.*]] = bitcast double [[TRUNC]] to <2 x i32> +// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 0 +// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 1 +uint1 test_double1(double1 D) { + uint A, B; + asuint(D, A, B); + return A + B; +} + +// CHECK: define {{.*}} <2 x i32> {{.*}}test_vector2{{.*}}(<2 x double> {{.*}} [[VALD:%.*]]) +// CHECK: [[VALRET:%.*]] = {{.*}} call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> [[VALD]]) +// CHECK-NEXT: extractvalue { <2 x i32>, <2 x i32> } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { <2 x i32>, <2 x i32> } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} <2 x i32> {{.*}}test_vector2{{.*}}(<2 x double> {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load <2 x double>, ptr [[VALD]].addr, align 16 +// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <2 x double> [[LOAD]] to <4 x i32> +// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> +// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> +uint2 test_vector2(double2 D) { + uint2 A, B; + asuint(D, A, B); + return A + B; +} + +// CHECK: define {{.*}} <3 x i32> {{.*}}test_vector3{{.*}}(<3 x double> {{.*}} [[VALD:%.*]]) +// CHECK: [[VALRET:%.*]] = {{.*}} call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> [[VALD]]) +// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} <3 x i32> {{.*}}test_vector3{{.*}}(<3 x double> {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load <3 x double>, ptr [[VALD]].addr, align 32 +// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <3 x double> [[LOAD]] to <6 x i32> +// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <6 x i32> [[CAST1]], <6 x i32> poison, <3 x i32> +// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <6 x i32> [[CAST1]], <6 x i32> poison, <3 x i32> +uint3 test_vector3(double3 D) { + uint3 A, B; + asuint(D, A, B); + return A + B; +} + +// CHECK: define {{.*}} <4 x i32> {{.*}}test_vector4{{.*}}(<4 x double> {{.*}} [[VALD:%.*]]) +// CHECK: [[VALRET:%.*]] = {{.*}} call { <4 x i32>, <4 x i32> } @llvm.dx.splitdouble.v4i32(<4 x double> [[VALD]]) +// CHECK-NEXT: extractvalue { <4 x i32>, <4 x i32> } [[VALRET]], 0 +// CHECK-NEXT: extractvalue { <4 x i32>, <4 x i32> } [[VALRET]], 1 +// +// SPIRV: define spir_func {{.*}} <4 x i32> {{.*}}test_vector4{{.*}}(<4 x double> {{.*}} [[VALD:%.*]]) +// SPIRV-NOT: @llvm.dx.splitdouble.i32 +// SPIRV: [[LOAD:%.*]] = load <4 x double>, ptr [[VALD]].addr, align 32 +// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <4 x double> [[LOAD]] to <8 x i32> +// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <8 x i32> [[CAST1]], <8 x i32> poison, <4 x i32> +// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <8 x i32> [[CAST1]], <8 x i32> poison, <4 x i32> +uint4 test_vector4(double4 D) { + uint4 A, B; + asuint(D, A, B); + return A + B; +} diff --git a/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl index 8c56fdddb1c24c5..4adb0555c35be60 100644 --- a/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl @@ -6,6 +6,10 @@ uint4 test_asuint_too_many_arg(float p0, float p1) { // expected-error@-1 {{no matching function for call to 'asuint'}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'V', but 2 arguments were provided}} // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'F', but 2 arguments were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} + // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}} } uint test_asuint_double(double p1) { @@ -23,3 +27,29 @@ uint test_asuint_half(half p1) { // expected-note@hlsl/hlsl_detail.h:* {{candidate template ignored: could not match 'vector' against 'half'}} // expected-note@hlsl/hlsl_detail.h:* {{candidate template ignored: substitution failure [with U = uint, T = half]: no type named 'Type'}} } + +void test_asuint_first_arg_const(double D) { + const uint A = 0; + uint B; + asuint(D, A, B); + // expected-error@hlsl/hlsl_intrinsics.h:* {{read-only variable is not assignable}} +} + +void test_asuint_second_arg_const(double D) { + const uint A = 0; + uint B; + asuint(D, B, A); + // expected-error@hlsl/hlsl_intrinsics.h:* {{read-only variable is not assignable}} +} + +void test_asuint_imidiate_value(double D) { + uint B; + asuint(D, B, 1); + // expected-error@-1 {{cannot bind non-lvalue argument 1 to out paramemter}} +} + +void test_asuint_expr(double D) { + uint B; + asuint(D, B, B + 1); + // expected-error@-1 {{cannot bind non-lvalue argument B + 1 to out paramemter}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl new file mode 100644 index 000000000000000..18d2b692b335b9b --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl @@ -0,0 +1,76 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify + +void test_no_second_arg(double D) { + __builtin_hlsl_elementwise_splitdouble(D); + // expected-error@-1 {{too few arguments to function call, expected 3, have 1}} +} + +void test_no_third_arg(double D) { + uint A; + __builtin_hlsl_elementwise_splitdouble(D, A); + // expected-error@-1 {{too few arguments to function call, expected 3, have 2}} +} + +void test_too_many_arg(double D) { + uint A, B, C; + __builtin_hlsl_elementwise_splitdouble(D, A, B, C); + // expected-error@-1 {{too many arguments to function call, expected 3, have 4}} +} + +void test_first_arg_type_mismatch(bool3 D) { + uint3 A, B; + __builtin_hlsl_elementwise_splitdouble(D, A, B); + // expected-error@-1 {{invalid operand of type 'bool3' (aka 'vector') where 'double' or a vector of such type is required}} +} + +void test_second_arg_type_mismatch(double D) { + bool A; + uint B; + __builtin_hlsl_elementwise_splitdouble(D, A, B); + // expected-error@-1 {{invalid operand of type 'bool' where 'unsigned int' or a vector of such type is required}} +} + +void test_third_arg_type_mismatch(double D) { + bool A; + uint B; + __builtin_hlsl_elementwise_splitdouble(D, B, A); + // expected-error@-1 {{invalid operand of type 'bool' where 'unsigned int' or a vector of such type is required}} +} + +void test_const_second_arg(double D) { + const uint A = 1; + uint B; + __builtin_hlsl_elementwise_splitdouble(D, A, B); + // expected-error@-1 {{cannot bind non-lvalue argument A to out paramemter}} +} + +void test_const_third_arg(double D) { + uint A; + const uint B = 1; + __builtin_hlsl_elementwise_splitdouble(D, A, B); + // expected-error@-1 {{cannot bind non-lvalue argument B to out paramemter}} +} + +void test_number_second_arg(double D) { + uint B; + __builtin_hlsl_elementwise_splitdouble(D, (uint)1, B); + // expected-error@-1 {{cannot bind non-lvalue argument (uint)1 to out paramemter}} +} + +void test_number_third_arg(double D) { + uint B; + __builtin_hlsl_elementwise_splitdouble(D, B, (uint)1); + // expected-error@-1 {{cannot bind non-lvalue argument (uint)1 to out paramemter}} +} + +void test_expr_second_arg(double D) { + uint B; + __builtin_hlsl_elementwise_splitdouble(D, B+1, B); + // expected-error@-1 {{cannot bind non-lvalue argument B + 1 to out paramemter}} +} + +void test_expr_third_arg(double D) { + uint B; + __builtin_hlsl_elementwise_splitdouble(D, B, B+1); + // expected-error@-1 {{cannot bind non-lvalue argument B + 1 to out paramemter}} +} diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 147b32b1ca99030..68ae5de06423c2a 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -47,6 +47,7 @@ def ResRetInt32Ty : DXILOpParamType; def HandleTy : DXILOpParamType; def ResBindTy : DXILOpParamType; def ResPropsTy : DXILOpParamType; +def SplitDoubleTy : DXILOpParamType; class DXILOpClass; @@ -779,6 +780,15 @@ def FlattenedThreadIdInGroup : DXILOp<96, flattenedThreadIdInGroup> { let attributes = [Attributes]; } +def SplitDouble : DXILOp<102, splitDouble> { + let Doc = "Splits a double into 2 uints"; + let arguments = [OverloadTy]; + let result = SplitDoubleTy; + let overloads = [Overloads]; + let stages = [Stages]; + let attributes = [Attributes]; +} + def AnnotateHandle : DXILOp<217, annotateHandle> { let Doc = "annotate handle with resource properties"; let arguments = [HandleTy, ResPropsTy]; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index 7719d6b1079110b..5d5bb3eacace258 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -229,6 +229,13 @@ static StructType *getResPropsType(LLVMContext &Context) { return StructType::create({Int32Ty, Int32Ty}, "dx.types.ResourceProperties"); } +static StructType *getSplitDoubleType(LLVMContext &Context) { + if (auto *ST = StructType::getTypeByName(Context, "dx.types.splitdouble")) + return ST; + Type *Int32Ty = Type::getInt32Ty(Context); + return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble"); +} + static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, Type *OverloadTy) { switch (Kind) { @@ -266,6 +273,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, return getResBindType(Ctx); case OpParamType::ResPropsTy: return getResPropsType(Ctx); + case OpParamType::SplitDoubleTy: + return getSplitDoubleType(Ctx); } llvm_unreachable("Invalid parameter kind"); return nullptr; @@ -467,6 +476,10 @@ StructType *DXILOpBuilder::getResRetType(Type *ElementTy) { return ::getResRetType(ElementTy); } +StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) { + return ::getSplitDoubleType(Context); +} + StructType *DXILOpBuilder::getHandleType() { return ::getHandleType(IRB.getContext()); } diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h index 037ae3822cfb906..df5a0240870f4a4 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.h +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h @@ -49,6 +49,10 @@ class DXILOpBuilder { /// Get a `%dx.types.ResRet` type with the given element type. StructType *getResRetType(Type *ElementTy); + + /// Get the `%dx.types.splitdouble` type. + StructType *getSplitDoubleType(LLVMContext &Context); + /// Get the `%dx.types.Handle` type. StructType *getHandleType(); diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index c62ba8c21d67917..f7722d77074764d 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/Module.h" @@ -128,6 +129,30 @@ class OpLowerer { }); } + [[nodiscard]] bool replaceFunctionWithNamedStructOp( + Function &F, dxil::OpCode DXILOp, Type *NewRetTy, + llvm::function_ref ReplaceUses) { + bool IsVectorArgExpansion = isVectorArgExpansion(F); + return replaceFunction(F, [&](CallInst *CI) -> Error { + SmallVector Args; + OpBuilder.getIRB().SetInsertPoint(CI); + if (IsVectorArgExpansion) { + SmallVector NewArgs = argVectorFlatten(CI, OpBuilder.getIRB()); + Args.append(NewArgs.begin(), NewArgs.end()); + } else + Args.append(CI->arg_begin(), CI->arg_end()); + + Expected OpCall = + OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), NewRetTy); + if (Error E = OpCall.takeError()) + return E; + if (Error E = ReplaceUses(CI, *OpCall)) + return E; + + return Error::success(); + }); + } + /// Create a cast between a `target("dx")` type and `dx.types.Handle`, which /// is intended to be removed by the end of lowering. This is used to allow /// lowering of ops which need to change their return or argument types in a @@ -263,6 +288,26 @@ class OpLowerer { return lowerToBindAndAnnotateHandle(F); } + Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) { + for (Use &U : make_early_inc_range(Intrin->uses())) { + if (auto *EVI = dyn_cast(U.getUser())) { + + if (EVI->getNumIndices() != 1) + return createStringError(std::errc::invalid_argument, + "Splitdouble has only 2 elements"); + EVI->setOperand(0, Op); + } else { + return make_error( + "Splitdouble use is not ExtractValueInst", + inconvertibleErrorCode()); + } + } + + Intrin->eraseFromParent(); + + return Error::success(); + } + /// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op. /// Since we expect to be post-scalarization, make an effort to avoid vectors. Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) { @@ -488,6 +533,16 @@ class OpLowerer { case Intrinsic::dx_typedBufferStore: HasErrors |= lowerTypedBufferStore(F); break; + // TODO: this can be removed when + // https://github.com/llvm/llvm-project/issues/113192 is fixed + case Intrinsic::dx_splitdouble: + HasErrors |= replaceFunctionWithNamedStructOp( + F, OpCode::SplitDouble, + OpBuilder.getSplitDoubleType(M.getContext()), + [&](CallInst *CI, CallInst *Op) { + return replaceSplitDoubleCallUsages(CI, Op); + }); + break; } Updated = true; } diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll deleted file mode 100644 index 759590fa56279b1..000000000000000 --- a/llvm/test/CodeGen/DirectX/split-double.ll +++ /dev/null @@ -1,45 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s - -define void @test_vector_double_split_void(<2 x double> noundef %d) { -; CHECK-LABEL: define void @test_vector_double_split_void( -; CHECK-SAME: <2 x double> noundef [[D:%.*]]) { -; CHECK-NEXT: [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0 -; CHECK-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) -; CHECK-NEXT: [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1 -; CHECK-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) -; CHECK-NEXT: ret void -; - %hlsl.asuint = call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> %d) - ret void -} - -define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) { -; CHECK-LABEL: define noundef <3 x i32> @test_vector_double_split( -; CHECK-SAME: <3 x double> noundef [[D:%.*]]) { -; CHECK-NEXT: [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0 -; CHECK-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) -; CHECK-NEXT: [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1 -; CHECK-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) -; CHECK-NEXT: [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2 -; CHECK-NEXT: [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]]) -; CHECK-NEXT: [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0 -; CHECK-NEXT: [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0 -; CHECK-NEXT: [[DOTELEM02:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 0 -; CHECK-NEXT: [[DOTELEM1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1 -; CHECK-NEXT: [[DOTELEM13:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 1 -; CHECK-NEXT: [[DOTELEM14:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 1 -; CHECK-NEXT: [[DOTI0:%.*]] = add i32 [[DOTELEM0]], [[DOTELEM1]] -; CHECK-NEXT: [[DOTI1:%.*]] = add i32 [[DOTELEM01]], [[DOTELEM13]] -; CHECK-NEXT: [[DOTI2:%.*]] = add i32 [[DOTELEM02]], [[DOTELEM14]] -; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <3 x i32> poison, i32 [[DOTI0]], i64 0 -; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <3 x i32> [[DOTUPTO015]], i32 [[DOTI1]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> [[DOTUPTO116]], i32 [[DOTI2]], i64 2 -; CHECK-NEXT: ret <3 x i32> [[TMP1]] -; - %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) - %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0 - %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1 - %3 = add <3 x i32> %1, %2 - ret <3 x i32> %3 -} diff --git a/llvm/test/CodeGen/DirectX/splitdouble.ll b/llvm/test/CodeGen/DirectX/splitdouble.ll new file mode 100644 index 000000000000000..1443ba6269255a9 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/splitdouble.ll @@ -0,0 +1,76 @@ +; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,NOLOWER +; RUN: opt -passes='function(scalarizer),module(dxil-op-lower)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,WITHLOWER + +define i32 @test_scalar(double noundef %D) { +; CHECK-LABEL: define i32 @test_scalar( +; CHECK-SAME: double noundef [[D:%.*]]) { +; NOLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D]]) +; NOLOWER-NEXT: [[EV1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0 +; NOLOWER-NEXT: [[EV2:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1 +; WITHLOWER-NEXT: [[EV1:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0 +; WITHLOWER-NEXT: [[EV2:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 1 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[EV1]], [[EV2]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %hlsl.splitdouble = call { i32, i32 } @llvm.dx.splitdouble.i32(double %D) + %1 = extractvalue { i32, i32 } %hlsl.splitdouble, 0 + %2 = extractvalue { i32, i32 } %hlsl.splitdouble, 1 + %add = add i32 %1, %2 + ret i32 %add +} + + +define void @test_vector_double_split_void(<2 x double> noundef %d) { +; CHECK-LABEL: define void @test_vector_double_split_void( +; CHECK-SAME: <2 x double> noundef [[D:%.*]]) { +; CHECK-NEXT: [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0 +; NOLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]]) +; CHECK-NEXT: [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1 +; NOLOWER-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]]) +; CHECK-NEXT: ret void +; + %hlsl.asuint = call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> %d) + ret void +} + +define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) { +; CHECK-LABEL: define noundef <3 x i32> @test_vector_double_split( +; CHECK-SAME: <3 x double> noundef [[D:%.*]]) { +; CHECK-NEXT: [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0 +; NOLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]]) +; CHECK-NEXT: [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1 +; NOLOWER-NEXT: [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]]) +; CHECK-NEXT: [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2 +; NOLOWER-NEXT: [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]]) +; WITHLOWER-NEXT: [[HLSL_ASUINT_I2:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I2]]) +; NOLOWER-NEXT: [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0 +; WITHLOWER-NEXT: [[DOTELEM0:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0 +; NOLOWER-NEXT: [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0 +; WITHLOWER-NEXT: [[DOTELEM01:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I1]], 0 +; NOLOWER-NEXT: [[DOTELEM02:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 0 +; WITHLOWER-NEXT: [[DOTELEM02:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I2]], 0 +; NOLOWER-NEXT: [[DOTELEM1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1 +; WITHLOWER-NEXT: [[DOTELEM1:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 1 +; NOLOWER-NEXT: [[DOTELEM13:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 1 +; WITHLOWER-NEXT: [[DOTELEM13:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I1]], 1 +; NOLOWER-NEXT: [[DOTELEM14:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 1 +; WITHLOWER-NEXT: [[DOTELEM14:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I2]], 1 +; CHECK-NEXT: [[DOTI0:%.*]] = add i32 [[DOTELEM0]], [[DOTELEM1]] +; CHECK-NEXT: [[DOTI1:%.*]] = add i32 [[DOTELEM01]], [[DOTELEM13]] +; CHECK-NEXT: [[DOTI2:%.*]] = add i32 [[DOTELEM02]], [[DOTELEM14]] +; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <3 x i32> poison, i32 [[DOTI0]], i64 0 +; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <3 x i32> [[DOTUPTO015]], i32 [[DOTI1]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> [[DOTUPTO116]], i32 [[DOTI2]], i64 2 +; CHECK-NEXT: ret <3 x i32> [[TMP1]] +; + %hlsl.asuint = call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> %d) + %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0 + %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1 + %3 = add <3 x i32> %1, %2 + ret <3 x i32> %3 +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll new file mode 100644 index 000000000000000..d18b16b843c37bc --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll @@ -0,0 +1,40 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; Make sure lowering is correctly generating spirv code. + +; CHECK-DAG: %[[#double:]] = OpTypeFloat 64 +; CHECK-DAG: %[[#vec_2_double:]] = OpTypeVector %[[#double]] 2 +; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#vec_2_int_32:]] = OpTypeVector %[[#int_32]] 2 +; CHECK-DAG: %[[#vec_4_int_32:]] = OpTypeVector %[[#int_32]] 4 + + +define spir_func noundef i32 @test_scalar(double noundef %D) local_unnamed_addr { +entry: + ; CHECK-LABEL: ; -- Begin function test_scalar + ; CHECK: %[[#param:]] = OpFunctionParameter %[[#double]] + ; CHECK: %[[#bitcast:]] = OpBitcast %[[#vec_2_int_32]] %[[#param]] + %0 = bitcast double %D to <2 x i32> + ; CHECK: %[[#]] = OpCompositeExtract %[[#int_32]] %[[#bitcast]] 0 + %1 = extractelement <2 x i32> %0, i64 0 + ; CHECK: %[[#]] = OpCompositeExtract %[[#int_32]] %[[#bitcast]] 1 + %2 = extractelement <2 x i32> %0, i64 1 + %add = add i32 %1, %2 + ret i32 %add +} + + +define spir_func noundef <2 x i32> @test_vector(<2 x double> noundef %D) local_unnamed_addr { +entry: + ; CHECK-LABEL: ; -- Begin function test_vector + ; CHECK: %[[#param:]] = OpFunctionParameter %[[#vec_2_double]] + ; CHECK: %[[#CAST1:]] = OpBitcast %[[#vec_4_int_32]] %[[#param]] + ; CHECK: %[[#SHUFF2:]] = OpVectorShuffle %[[#vec_2_int_32]] %[[#CAST1]] %[[#]] 0 2 + ; CHECK: %[[#SHUFF3:]] = OpVectorShuffle %[[#vec_2_int_32]] %[[#CAST1]] %[[#]] 1 3 + %0 = bitcast <2 x double> %D to <4 x i32> + %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> + %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> + %add = add <2 x i32> %1, %2 + ret <2 x i32> %add +} From abc49cc19463970d5523d7d3332e4c1f83bc2ef7 Mon Sep 17 00:00:00 2001 From: Job Henandez Lara Date: Mon, 28 Oct 2024 13:29:16 -0700 Subject: [PATCH 216/425] [libc] remove #include and add proxy or type (#113836) --- libc/hdr/CMakeLists.txt | 3 ++ libc/hdr/fcntl_macros.h | 2 +- libc/hdr/fcntl_overlay.h | 37 +++++++++++++++++++ libc/hdr/types/CMakeLists.txt | 11 ++++++ libc/hdr/types/mode_t.h | 22 +++++++++++ libc/src/__support/File/linux/CMakeLists.txt | 4 +- libc/src/__support/File/linux/dir.cpp | 2 +- libc/src/__support/File/linux/file.cpp | 2 +- .../__support/threads/linux/CMakeLists.txt | 2 +- libc/src/__support/threads/linux/thread.cpp | 2 +- libc/src/fcntl/creat.h | 2 +- libc/src/fcntl/linux/CMakeLists.txt | 8 ++-- libc/src/fcntl/linux/creat.cpp | 2 +- libc/src/fcntl/linux/open.cpp | 2 +- libc/src/fcntl/linux/openat.cpp | 2 +- libc/src/fcntl/open.h | 2 +- libc/src/fcntl/openat.h | 2 +- libc/src/spawn/linux/CMakeLists.txt | 2 +- libc/src/spawn/linux/posix_spawn.cpp | 2 +- libc/src/stdio/linux/CMakeLists.txt | 2 +- libc/src/stdio/linux/remove.cpp | 2 +- libc/src/sys/stat/linux/CMakeLists.txt | 11 +++--- libc/src/sys/stat/linux/chmod.cpp | 2 +- libc/src/sys/stat/linux/fchmod.cpp | 2 +- libc/src/sys/stat/linux/fstat.cpp | 2 +- libc/src/sys/stat/linux/lstat.cpp | 2 +- libc/src/sys/stat/linux/mkdir.cpp | 2 +- libc/src/sys/stat/linux/stat.cpp | 2 +- libc/src/unistd/linux/CMakeLists.txt | 21 ++++++----- libc/src/unistd/linux/access.cpp | 2 +- libc/src/unistd/linux/dup2.cpp | 2 +- libc/src/unistd/linux/link.cpp | 2 +- libc/src/unistd/linux/linkat.cpp | 2 +- libc/src/unistd/linux/readlink.cpp | 2 +- libc/src/unistd/linux/readlinkat.cpp | 2 +- libc/src/unistd/linux/rmdir.cpp | 2 +- libc/src/unistd/linux/symlink.cpp | 2 +- libc/src/unistd/linux/symlinkat.cpp | 2 +- libc/src/unistd/linux/unlink.cpp | 2 +- libc/src/unistd/linux/unlinkat.cpp | 2 +- libc/test/src/fcntl/CMakeLists.txt | 2 +- libc/test/src/fcntl/openat_test.cpp | 2 +- libc/test/src/sys/sendfile/CMakeLists.txt | 2 +- libc/test/src/sys/sendfile/sendfile_test.cpp | 2 +- libc/test/src/sys/stat/CMakeLists.txt | 14 +++---- libc/test/src/sys/stat/chmod_test.cpp | 2 +- libc/test/src/sys/stat/fchmod_test.cpp | 2 +- libc/test/src/sys/stat/fchmodat_test.cpp | 2 +- libc/test/src/sys/stat/fstat_test.cpp | 2 +- libc/test/src/sys/stat/lstat_test.cpp | 2 +- libc/test/src/sys/stat/mkdirat_test.cpp | 2 +- libc/test/src/sys/stat/stat_test.cpp | 2 +- libc/test/src/unistd/CMakeLists.txt | 9 +++-- libc/test/src/unistd/chdir_test.cpp | 2 +- libc/test/src/unistd/fchdir_test.cpp | 2 +- libc/test/src/unistd/readlinkat_test.cpp | 2 +- libc/test/src/unistd/rmdir_test.cpp | 2 +- libc/test/src/unistd/syscall_test.cpp | 2 +- 58 files changed, 156 insertions(+), 80 deletions(-) create mode 100644 libc/hdr/fcntl_overlay.h create mode 100644 libc/hdr/types/mode_t.h diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt index 13dc892978bb87a..80545ee4b359f4e 100644 --- a/libc/hdr/CMakeLists.txt +++ b/libc/hdr/CMakeLists.txt @@ -51,10 +51,13 @@ add_proxy_header_library( libc.include.llvm-libc-macros.generic_error_number_macros ) +add_header_library(fcntl_overlay HDRS fcntl_overlay.h) add_proxy_header_library( fcntl_macros HDRS fcntl_macros.h + DEPENDS + .fcntl_overlay FULL_BUILD_DEPENDS libc.include.llvm-libc-macros.fcntl_macros libc.include.fcntl diff --git a/libc/hdr/fcntl_macros.h b/libc/hdr/fcntl_macros.h index 828cb984c0cb148..3a1ddeb0a2da1d9 100644 --- a/libc/hdr/fcntl_macros.h +++ b/libc/hdr/fcntl_macros.h @@ -15,7 +15,7 @@ #else // Overlay mode -#include +#include "hdr/fcntl_overlay.h" #endif // LLVM_LIBC_FULL_BUILD diff --git a/libc/hdr/fcntl_overlay.h b/libc/hdr/fcntl_overlay.h new file mode 100644 index 000000000000000..c1cc98b0ebb2c7e --- /dev/null +++ b/libc/hdr/fcntl_overlay.h @@ -0,0 +1,37 @@ +//===-- Including fcntl.h in overlay mode ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_FCNTL_OVERLAY_H +#define LLVM_LIBC_HDR_FCNTL_OVERLAY_H + +#ifdef LIBC_FULL_BUILD +#error "This header should only be included in overlay mode" +#endif + +// Overlay mode + +// glibc header might provide extern inline definitions for few +// functions, causing external alias errors. They are guarded by +// `__USE_FORTIFY_LEVEL`, which will be temporarily disabled +// with `_FORTIFY_SOURCE`. + +#ifdef __USE_FORTIFY_LEVEL +#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL +#undef __USE_FORTIFY_LEVEL +#define __USE_FORTIFY_LEVEL 0 +#endif + +#include + +#ifdef LIBC_OLD_USE_FORTIFY_LEVEL +#undef __USE_FORTIFY_LEVEL +#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL +#undef LIBC_OLD_USE_FORTIFY_LEVEL +#endif + +#endif // LLVM_LIBC_HDR_FCNTL_OVERLAY_H diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt index fab5245816bbe19..e45979857d79554 100644 --- a/libc/hdr/types/CMakeLists.txt +++ b/libc/hdr/types/CMakeLists.txt @@ -46,6 +46,17 @@ add_proxy_header_library( libc.include.llvm-libc-types.struct_timespec ) +add_proxy_header_library( + mode_t + HDRS + mode_t.h + DEPENDS + ../fcntl_overlay + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.mode_t + libc.include.fcntl +) + add_proxy_header_library( fenv_t HDRS diff --git a/libc/hdr/types/mode_t.h b/libc/hdr/types/mode_t.h new file mode 100644 index 000000000000000..abbbdb0a09d7b63 --- /dev/null +++ b/libc/hdr/types/mode_t.h @@ -0,0 +1,22 @@ +//===-- Definition of macros from mode_t.h --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_MODE_T_H +#define LLVM_LIBC_HDR_MODE_T_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/mode_t.h" + +#else // Overlay mode + +#include "hdr/fcntl_overlay.h" + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_MODE_T_H diff --git a/libc/src/__support/File/linux/CMakeLists.txt b/libc/src/__support/File/linux/CMakeLists.txt index 5abbf11b3671cd2..84e3d5608361e19 100644 --- a/libc/src/__support/File/linux/CMakeLists.txt +++ b/libc/src/__support/File/linux/CMakeLists.txt @@ -7,7 +7,7 @@ add_object_library( file.h lseekImpl.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_syscall libc.include.sys_stat libc.src.__support.CPP.new @@ -55,7 +55,7 @@ add_object_library( SRCS dir.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_syscall libc.src.__support.OSUtil.osutil libc.src.__support.error_or diff --git a/libc/src/__support/File/linux/dir.cpp b/libc/src/__support/File/linux/dir.cpp index fc90ff097e4606d..5fe44fa8297b68d 100644 --- a/libc/src/__support/File/linux/dir.cpp +++ b/libc/src/__support/File/linux/dir.cpp @@ -12,7 +12,7 @@ #include "src/__support/error_or.h" #include "src/__support/macros/config.h" -#include // For open flags +#include "hdr/fcntl_macros.h" // For open flags #include // For syscall numbers namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/__support/File/linux/file.cpp b/libc/src/__support/File/linux/file.cpp index 22292336f300e21..824c1f200e8c5b1 100644 --- a/libc/src/__support/File/linux/file.cpp +++ b/libc/src/__support/File/linux/file.cpp @@ -18,7 +18,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" // For error macros -#include // For mode_t and other flags to the open syscall +#include "hdr/fcntl_macros.h" // For mode_t and other flags to the open syscall #include // For S_IS*, S_IF*, and S_IR* flags. #include // For syscall numbers diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index b6796f40adce7bd..fa11458f99b6c9c 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -79,7 +79,7 @@ add_object_library( .futex_utils libc.config.app_h libc.include.sys_syscall - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.errno.errno libc.src.__support.CPP.atomic libc.src.__support.CPP.stringstream diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp index ee3f63fa3cde32e..c531d74c533550d 100644 --- a/libc/src/__support/threads/linux/thread.cpp +++ b/libc/src/__support/threads/linux/thread.cpp @@ -22,7 +22,7 @@ #include #endif -#include +#include "hdr/fcntl_macros.h" #include // For EXEC_PAGESIZE. #include // For PR_SET_NAME #include // For CLONE_* flags. diff --git a/libc/src/fcntl/creat.h b/libc/src/fcntl/creat.h index e180e17c2578870..3e00427638a36a4 100644 --- a/libc/src/fcntl/creat.h +++ b/libc/src/fcntl/creat.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_FCNTL_CREAT_H #define LLVM_LIBC_SRC_FCNTL_CREAT_H +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/fcntl/linux/CMakeLists.txt b/libc/src/fcntl/linux/CMakeLists.txt index ee8ae63b8cf062a..ecfb2cdd3f3361c 100644 --- a/libc/src/fcntl/linux/CMakeLists.txt +++ b/libc/src/fcntl/linux/CMakeLists.txt @@ -5,7 +5,7 @@ add_entrypoint_object( HDRS ../creat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -17,7 +17,7 @@ add_entrypoint_object( HDRS ../fcntl.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.__support.OSUtil.osutil ) @@ -28,7 +28,7 @@ add_entrypoint_object( HDRS ../open.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -40,7 +40,7 @@ add_entrypoint_object( HDRS ../openat.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/fcntl/linux/creat.cpp b/libc/src/fcntl/linux/creat.cpp index 2c5b5d736a3be3f..23abae243aed92c 100644 --- a/libc/src/fcntl/linux/creat.cpp +++ b/libc/src/fcntl/linux/creat.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/fcntl/linux/open.cpp b/libc/src/fcntl/linux/open.cpp index 79b7b2b32c887bb..9f3d035388e8652 100644 --- a/libc/src/fcntl/linux/open.cpp +++ b/libc/src/fcntl/linux/open.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/types/mode_t.h" #include #include // For syscall numbers. diff --git a/libc/src/fcntl/linux/openat.cpp b/libc/src/fcntl/linux/openat.cpp index 0862082c22ebfcd..6063d9c00ad6c40 100644 --- a/libc/src/fcntl/linux/openat.cpp +++ b/libc/src/fcntl/linux/openat.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/types/mode_t.h" #include #include // For syscall numbers. diff --git a/libc/src/fcntl/open.h b/libc/src/fcntl/open.h index 19bb53c2e320319..11f0ae53795318c 100644 --- a/libc/src/fcntl/open.h +++ b/libc/src/fcntl/open.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_FCNTL_OPEN_H #define LLVM_LIBC_SRC_FCNTL_OPEN_H +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/fcntl/openat.h b/libc/src/fcntl/openat.h index d09791a84f73529..051c8a2304dcbac 100644 --- a/libc/src/fcntl/openat.h +++ b/libc/src/fcntl/openat.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_FCNTL_OPENAT_H #define LLVM_LIBC_SRC_FCNTL_OPENAT_H +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/spawn/linux/CMakeLists.txt b/libc/src/spawn/linux/CMakeLists.txt index 9ef3a9d18b0c690..5f7ab4c43165dcc 100644 --- a/libc/src/spawn/linux/CMakeLists.txt +++ b/libc/src/spawn/linux/CMakeLists.txt @@ -5,7 +5,7 @@ add_entrypoint_object( HDRS ../posix_spawn.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t libc.include.spawn libc.include.sys_syscall libc.include.signal diff --git a/libc/src/spawn/linux/posix_spawn.cpp b/libc/src/spawn/linux/posix_spawn.cpp index 4c0469b3ce384a2..d6caf8b374a02bc 100644 --- a/libc/src/spawn/linux/posix_spawn.cpp +++ b/libc/src/spawn/linux/posix_spawn.cpp @@ -14,7 +14,7 @@ #include "src/__support/macros/config.h" #include "src/spawn/file_actions.h" -#include +#include "hdr/types/mode_t.h" #include // For SIGCHLD #include #include // For syscall numbers. diff --git a/libc/src/stdio/linux/CMakeLists.txt b/libc/src/stdio/linux/CMakeLists.txt index d6241e1ca0439d8..e81642dc6f01e6f 100644 --- a/libc/src/stdio/linux/CMakeLists.txt +++ b/libc/src/stdio/linux/CMakeLists.txt @@ -5,7 +5,7 @@ add_entrypoint_object( HDRS ../remove.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil diff --git a/libc/src/stdio/linux/remove.cpp b/libc/src/stdio/linux/remove.cpp index 9e299aaf43e450b..dbb4491d0e6cc1c 100644 --- a/libc/src/stdio/linux/remove.cpp +++ b/libc/src/stdio/linux/remove.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" // For AT_* macros. #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include // For AT_* macros. #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sys/stat/linux/CMakeLists.txt b/libc/src/sys/stat/linux/CMakeLists.txt index 415d2fa5c87715d..7c9496b6b6e8c6d 100644 --- a/libc/src/sys/stat/linux/CMakeLists.txt +++ b/libc/src/sys/stat/linux/CMakeLists.txt @@ -5,7 +5,7 @@ add_entrypoint_object( HDRS ../chmod.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t libc.include.sys_stat libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -19,6 +19,7 @@ add_entrypoint_object( HDRS ../fchmod.h DEPENDS + libc.hdr.types.mode_t libc.include.sys_stat libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -45,7 +46,7 @@ add_entrypoint_object( HDRS ../mkdir.h DEPENDS - libc.include.fcntl + libc.hdr.types.mode_t libc.include.sys_stat libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -84,7 +85,7 @@ add_entrypoint_object( ../stat.h DEPENDS .kernel_statx - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno ) @@ -97,7 +98,7 @@ add_entrypoint_object( ../lstat.h DEPENDS .kernel_statx - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno ) @@ -110,7 +111,7 @@ add_entrypoint_object( ../fstat.h DEPENDS .kernel_statx - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno ) diff --git a/libc/src/sys/stat/linux/chmod.cpp b/libc/src/sys/stat/linux/chmod.cpp index c91cabb514a8c9d..9d2860391818109 100644 --- a/libc/src/sys/stat/linux/chmod.cpp +++ b/libc/src/sys/stat/linux/chmod.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include #include // For syscall numbers. diff --git a/libc/src/sys/stat/linux/fchmod.cpp b/libc/src/sys/stat/linux/fchmod.cpp index 7b6c7b7091a8239..0d6fd359169aaff 100644 --- a/libc/src/sys/stat/linux/fchmod.cpp +++ b/libc/src/sys/stat/linux/fchmod.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include #include // For syscall numbers. diff --git a/libc/src/sys/stat/linux/fstat.cpp b/libc/src/sys/stat/linux/fstat.cpp index 411aa47bcda2ad3..35cf8f08f782d25 100644 --- a/libc/src/sys/stat/linux/fstat.cpp +++ b/libc/src/sys/stat/linux/fstat.cpp @@ -13,7 +13,7 @@ #include "src/__support/common.h" -#include +#include "hdr/fcntl_macros.h" #include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sys/stat/linux/lstat.cpp b/libc/src/sys/stat/linux/lstat.cpp index 5a6eff068d1dd79..354c5b6e029a44b 100644 --- a/libc/src/sys/stat/linux/lstat.cpp +++ b/libc/src/sys/stat/linux/lstat.cpp @@ -14,7 +14,7 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" -#include +#include "hdr/fcntl_macros.h" #include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sys/stat/linux/mkdir.cpp b/libc/src/sys/stat/linux/mkdir.cpp index 527c3d2058d2b72..bd6efef858c7b6c 100644 --- a/libc/src/sys/stat/linux/mkdir.cpp +++ b/libc/src/sys/stat/linux/mkdir.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include #include // For syscall numbers. diff --git a/libc/src/sys/stat/linux/stat.cpp b/libc/src/sys/stat/linux/stat.cpp index c5149e6e3c88393..de9cdb197d687ce 100644 --- a/libc/src/sys/stat/linux/stat.cpp +++ b/libc/src/sys/stat/linux/stat.cpp @@ -13,7 +13,7 @@ #include "src/__support/common.h" -#include +#include "hdr/fcntl_macros.h" #include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index 9b0d752cefbd8e5..472438ca72e49e4 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -5,6 +5,7 @@ add_entrypoint_object( HDRS ../access.h DEPENDS + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -57,7 +58,7 @@ add_entrypoint_object( HDRS ../dup2.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -254,7 +255,7 @@ add_entrypoint_object( HDRS ../link.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -268,7 +269,7 @@ add_entrypoint_object( HDRS ../linkat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -377,7 +378,7 @@ add_entrypoint_object( HDRS ../rmdir.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -391,7 +392,7 @@ add_entrypoint_object( HDRS ../readlink.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -405,7 +406,7 @@ add_entrypoint_object( HDRS ../readlinkat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -419,7 +420,7 @@ add_entrypoint_object( HDRS ../symlink.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -433,7 +434,7 @@ add_entrypoint_object( HDRS ../symlinkat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -485,7 +486,7 @@ add_entrypoint_object( HDRS ../unlink.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -499,7 +500,7 @@ add_entrypoint_object( HDRS ../unlinkat.h DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_syscall libc.src.__support.OSUtil.osutil diff --git a/libc/src/unistd/linux/access.cpp b/libc/src/unistd/linux/access.cpp index e9ad74989b05636..2f7ebbcdf9e810d 100644 --- a/libc/src/unistd/linux/access.cpp +++ b/libc/src/unistd/linux/access.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/dup2.cpp b/libc/src/unistd/linux/dup2.cpp index 51a19a71a7d854c..c7c7c1a8ca786f3 100644 --- a/libc/src/unistd/linux/dup2.cpp +++ b/libc/src/unistd/linux/dup2.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/link.cpp b/libc/src/unistd/linux/link.cpp index 37ca58eab1096dc..477806a70df7427 100644 --- a/libc/src/unistd/linux/link.cpp +++ b/libc/src/unistd/linux/link.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/linkat.cpp b/libc/src/unistd/linux/linkat.cpp index fcd6a5f75a196b4..40f68cc90c4809a 100644 --- a/libc/src/unistd/linux/linkat.cpp +++ b/libc/src/unistd/linux/linkat.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/readlink.cpp b/libc/src/unistd/linux/readlink.cpp index 7b152450044054a..2055e6b3400f2f5 100644 --- a/libc/src/unistd/linux/readlink.cpp +++ b/libc/src/unistd/linux/readlink.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/readlinkat.cpp b/libc/src/unistd/linux/readlinkat.cpp index 19a9ff9fbeb72ab..e5e4d0d39bc9cf5 100644 --- a/libc/src/unistd/linux/readlinkat.cpp +++ b/libc/src/unistd/linux/readlinkat.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/rmdir.cpp b/libc/src/unistd/linux/rmdir.cpp index 8974468ebcf16a3..075af12af64c5c4 100644 --- a/libc/src/unistd/linux/rmdir.cpp +++ b/libc/src/unistd/linux/rmdir.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/symlink.cpp b/libc/src/unistd/linux/symlink.cpp index 5efd4df85edabde..9e1b2886ea0f5f0 100644 --- a/libc/src/unistd/linux/symlink.cpp +++ b/libc/src/unistd/linux/symlink.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/symlinkat.cpp b/libc/src/unistd/linux/symlinkat.cpp index 63d2e6d1507a573..bcf2d0f8cc0551b 100644 --- a/libc/src/unistd/linux/symlinkat.cpp +++ b/libc/src/unistd/linux/symlinkat.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/unlink.cpp b/libc/src/unistd/linux/unlink.cpp index de7cae8b826ebc8..72d8e2398e3d761 100644 --- a/libc/src/unistd/linux/unlink.cpp +++ b/libc/src/unistd/linux/unlink.cpp @@ -11,9 +11,9 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/unistd/linux/unlinkat.cpp b/libc/src/unistd/linux/unlinkat.cpp index e794f242b9459ad..4ed20f542f17023 100644 --- a/libc/src/unistd/linux/unlinkat.cpp +++ b/libc/src/unistd/linux/unlinkat.cpp @@ -13,7 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" -#include +#include "hdr/fcntl_macros.h" #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { diff --git a/libc/test/src/fcntl/CMakeLists.txt b/libc/test/src/fcntl/CMakeLists.txt index 48048b7fe88666d..b522fef7439df74 100644 --- a/libc/test/src/fcntl/CMakeLists.txt +++ b/libc/test/src/fcntl/CMakeLists.txt @@ -42,7 +42,7 @@ add_libc_unittest( SRCS openat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.errno.errno libc.src.fcntl.open libc.src.fcntl.openat diff --git a/libc/test/src/fcntl/openat_test.cpp b/libc/test/src/fcntl/openat_test.cpp index 9dafd125224a406..547359eb9f7a9df 100644 --- a/libc/test/src/fcntl/openat_test.cpp +++ b/libc/test/src/fcntl/openat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcUniStd, OpenAndReadTest) { using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; diff --git a/libc/test/src/sys/sendfile/CMakeLists.txt b/libc/test/src/sys/sendfile/CMakeLists.txt index 82efaa147bd89d1..ceaa4accdd06ef2 100644 --- a/libc/test/src/sys/sendfile/CMakeLists.txt +++ b/libc/test/src/sys/sendfile/CMakeLists.txt @@ -9,7 +9,7 @@ add_libc_unittest( SRCS sendfile_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.fcntl.open diff --git a/libc/test/src/sys/sendfile/sendfile_test.cpp b/libc/test/src/sys/sendfile/sendfile_test.cpp index 59025438a24671e..a658212ddb72cdd 100644 --- a/libc/test/src/sys/sendfile/sendfile_test.cpp +++ b/libc/test/src/sys/sendfile/sendfile_test.cpp @@ -17,7 +17,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include namespace cpp = LIBC_NAMESPACE::cpp; diff --git a/libc/test/src/sys/stat/CMakeLists.txt b/libc/test/src/sys/stat/CMakeLists.txt index 877a129b627dd45..dd3d0932755b769 100644 --- a/libc/test/src/sys/stat/CMakeLists.txt +++ b/libc/test/src/sys/stat/CMakeLists.txt @@ -9,7 +9,7 @@ add_libc_unittest( SRCS chmod_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.fcntl.open @@ -25,7 +25,7 @@ add_libc_unittest( SRCS fchmodat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.fcntl.open @@ -41,7 +41,7 @@ add_libc_unittest( SRCS fchmod_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.fcntl.open @@ -57,7 +57,7 @@ add_libc_unittest( SRCS mkdirat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.sys.stat.mkdirat @@ -71,7 +71,7 @@ add_libc_unittest( SRCS stat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.sys.stat.stat @@ -87,7 +87,7 @@ add_libc_unittest( SRCS lstat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.sys.stat.lstat @@ -103,7 +103,7 @@ add_libc_unittest( SRCS fstat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_stat libc.src.errno.errno libc.src.sys.stat.fstat diff --git a/libc/test/src/sys/stat/chmod_test.cpp b/libc/test/src/sys/stat/chmod_test.cpp index c688996615ceef1..83ab0f45b6f08ae 100644 --- a/libc/test/src/sys/stat/chmod_test.cpp +++ b/libc/test/src/sys/stat/chmod_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcChmodTest, ChangeAndOpen) { diff --git a/libc/test/src/sys/stat/fchmod_test.cpp b/libc/test/src/sys/stat/fchmod_test.cpp index 91c0f68b8708c84..03eb79d95ddd6d3 100644 --- a/libc/test/src/sys/stat/fchmod_test.cpp +++ b/libc/test/src/sys/stat/fchmod_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcChmodTest, ChangeAndOpen) { diff --git a/libc/test/src/sys/stat/fchmodat_test.cpp b/libc/test/src/sys/stat/fchmodat_test.cpp index c43ef8ae13315a1..09970b6e0fb163d 100644 --- a/libc/test/src/sys/stat/fchmodat_test.cpp +++ b/libc/test/src/sys/stat/fchmodat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcFchmodatTest, ChangeAndOpen) { diff --git a/libc/test/src/sys/stat/fstat_test.cpp b/libc/test/src/sys/stat/fstat_test.cpp index 1379eae26a47aed..34c675d1a4e2992 100644 --- a/libc/test/src/sys/stat/fstat_test.cpp +++ b/libc/test/src/sys/stat/fstat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcFStatTest, CreatAndReadMode) { diff --git a/libc/test/src/sys/stat/lstat_test.cpp b/libc/test/src/sys/stat/lstat_test.cpp index b44b3d1a59ce7bf..a723d5ae2e297ba 100644 --- a/libc/test/src/sys/stat/lstat_test.cpp +++ b/libc/test/src/sys/stat/lstat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcLStatTest, CreatAndReadMode) { diff --git a/libc/test/src/sys/stat/mkdirat_test.cpp b/libc/test/src/sys/stat/mkdirat_test.cpp index cbacc16b402d7ab..85e013de234e76a 100644 --- a/libc/test/src/sys/stat/mkdirat_test.cpp +++ b/libc/test/src/sys/stat/mkdirat_test.cpp @@ -11,7 +11,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcMkdiratTest, CreateAndRemove) { using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; diff --git a/libc/test/src/sys/stat/stat_test.cpp b/libc/test/src/sys/stat/stat_test.cpp index baf363382022ad3..0ddd8baaec1c9b7 100644 --- a/libc/test/src/sys/stat/stat_test.cpp +++ b/libc/test/src/sys/stat/stat_test.cpp @@ -14,7 +14,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include TEST(LlvmLibcStatTest, CreatAndReadMode) { diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt index e03e56b3cf8ad71..ce936cebad4260c 100644 --- a/libc/test/src/unistd/CMakeLists.txt +++ b/libc/test/src/unistd/CMakeLists.txt @@ -24,11 +24,12 @@ add_libc_unittest( SRCS chdir_test.cpp DEPENDS + libc.hdr.fcntl_macros libc.include.unistd libc.src.errno.errno - libc.src.fcntl.open libc.src.unistd.chdir libc.src.unistd.close + libc.src.fcntl.open libc.test.UnitTest.ErrnoSetterMatcher ) @@ -223,7 +224,7 @@ add_libc_unittest( SRCS rmdir_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.src.errno.errno libc.src.sys.stat.mkdir libc.src.unistd.rmdir @@ -262,7 +263,7 @@ add_libc_unittest( SRCS readlinkat_test.cpp DEPENDS - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.unistd libc.src.errno.errno libc.src.unistd.readlinkat @@ -410,7 +411,7 @@ add_libc_unittest( syscall_test.cpp DEPENDS libc.include.unistd - libc.include.fcntl + libc.hdr.fcntl_macros libc.include.sys_syscall libc.src.errno.errno libc.src.unistd.__llvm_libc_syscall diff --git a/libc/test/src/unistd/chdir_test.cpp b/libc/test/src/unistd/chdir_test.cpp index 51dc7bb15d3ee6d..e1bdcd77119f733 100644 --- a/libc/test/src/unistd/chdir_test.cpp +++ b/libc/test/src/unistd/chdir_test.cpp @@ -13,7 +13,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcChdirTest, ChangeAndOpen) { // The idea of this test is that we will first open an existing test file diff --git a/libc/test/src/unistd/fchdir_test.cpp b/libc/test/src/unistd/fchdir_test.cpp index ae88e1f22ed6b76..0e39fde17c67bba 100644 --- a/libc/test/src/unistd/fchdir_test.cpp +++ b/libc/test/src/unistd/fchdir_test.cpp @@ -13,7 +13,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcChdirTest, ChangeAndOpen) { // The idea of this test is that we will first open an existing test file diff --git a/libc/test/src/unistd/readlinkat_test.cpp b/libc/test/src/unistd/readlinkat_test.cpp index 1fa683b02b5b5e6..9e4bb9af02e76a9 100644 --- a/libc/test/src/unistd/readlinkat_test.cpp +++ b/libc/test/src/unistd/readlinkat_test.cpp @@ -15,7 +15,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" namespace cpp = LIBC_NAMESPACE::cpp; diff --git a/libc/test/src/unistd/rmdir_test.cpp b/libc/test/src/unistd/rmdir_test.cpp index 93cb0f3f53c1b0b..4f4cd94c5cf0b73 100644 --- a/libc/test/src/unistd/rmdir_test.cpp +++ b/libc/test/src/unistd/rmdir_test.cpp @@ -12,7 +12,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" TEST(LlvmLibcRmdirTest, CreateAndRemove) { using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds; diff --git a/libc/test/src/unistd/syscall_test.cpp b/libc/test/src/unistd/syscall_test.cpp index cee29bd9afa308b..f6cc3eab9aabe87 100644 --- a/libc/test/src/unistd/syscall_test.cpp +++ b/libc/test/src/unistd/syscall_test.cpp @@ -11,7 +11,7 @@ #include "test/UnitTest/ErrnoSetterMatcher.h" #include "test/UnitTest/Test.h" -#include +#include "hdr/fcntl_macros.h" #include // For S_* flags. #include // For syscall numbers. #include From 9d9b1ba951cb2c30a192aa1157ef1c022d6d145b Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Mon, 28 Oct 2024 16:29:40 -0400 Subject: [PATCH 217/425] [ORC] Fix JITLinkRedirectableSymbolManager construction to avoid crash (#113492) Check `AnonymousPtrCreator` and `PtrJumpStubCreator` before creating the JITLinkRedirectableSymbolManager object. This simplifies construction, and avoids premature registration as a resource manager in the failure case. From 70d61f6de71bfe5ee870efc9b3e98db37273f17d Mon Sep 17 00:00:00 2001 From: Renaud Kauffmann Date: Mon, 28 Oct 2024 13:34:37 -0700 Subject: [PATCH 218/425] [flang][cuda] Adding runtime call to CUFRegisterVariable (#113952) --- flang/include/flang/Runtime/CUDA/registration.h | 5 +++++ flang/runtime/CUDA/registration.cpp | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/flang/include/flang/Runtime/CUDA/registration.h b/flang/include/flang/Runtime/CUDA/registration.h index 009715613e29f72..5237069a4c739c5 100644 --- a/flang/include/flang/Runtime/CUDA/registration.h +++ b/flang/include/flang/Runtime/CUDA/registration.h @@ -11,6 +11,7 @@ #include "flang/Runtime/entry-names.h" #include +#include namespace Fortran::runtime::cuda { @@ -23,6 +24,10 @@ void *RTDECL(CUFRegisterModule)(void *data); void RTDECL(CUFRegisterFunction)( void **module, const char *fctSym, char *fctName); +/// Register a device variable. +void RTDECL(CUFRegisterVariable)( + void **module, char *varSym, const char *varName, int64_t size); + } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/runtime/CUDA/registration.cpp b/flang/runtime/CUDA/registration.cpp index 20d274c4d8d1c2d..b7b6ef389bffba9 100644 --- a/flang/runtime/CUDA/registration.cpp +++ b/flang/runtime/CUDA/registration.cpp @@ -21,6 +21,9 @@ extern void __cudaRegisterFatBinaryEnd(void *); extern void __cudaRegisterFunction(void **fatCubinHandle, const char *hostFun, char *deviceFun, const char *deviceName, int thread_limit, uint3 *tid, uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize); +extern void __cudaRegisterVar(void **fatCubinHandle, char *hostVar, + const char *deviceAddress, const char *deviceName, int ext, size_t size, + int constant, int global); void *RTDECL(CUFRegisterModule)(void *data) { void **fatHandle{__cudaRegisterFatBinary(data)}; @@ -34,6 +37,11 @@ void RTDEF(CUFRegisterFunction)( (uint3 *)0, (dim3 *)0, (dim3 *)0, (int *)0); } +void RTDEF(CUFRegisterVariable)( + void **module, char *varSym, const char *varName, int64_t size) { + __cudaRegisterVar(module, varSym, varName, varName, 0, size, 0, 0); +} + } // extern "C" } // namespace Fortran::runtime::cuda From 71315698c91d0cda054b903da0594ca6f072c350 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Mon, 28 Oct 2024 20:40:52 +0000 Subject: [PATCH 219/425] [clang] Warn about memset/memcpy to NonTriviallyCopyable types (#111434) This implements a warning that's similar to what GCC does in that context: both memcpy and memset require their first and second operand to be trivially copyable, let's warn if that's not the case. --- clang/docs/ReleaseNotes.rst | 5 ++ .../clang/Basic/DiagnosticSemaKinds.td | 4 ++ clang/lib/Sema/SemaChecking.cpp | 18 +++++ clang/test/SemaCXX/constexpr-string.cpp | 2 + clang/test/SemaCXX/warn-memaccess.cpp | 68 +++++++++++++++++++ .../__memory/uninitialized_algorithms.h | 3 +- libcxx/test/std/utilities/expected/types.h | 6 +- libcxx/test/support/min_allocator.h | 4 +- 8 files changed, 104 insertions(+), 6 deletions(-) create mode 100644 clang/test/SemaCXX/warn-memaccess.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9515e96ffd01c1c..424f02ef08d70ea 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -322,6 +322,11 @@ Modified Compiler Flags to utilize these vector libraries. The behavior for all other vector function libraries remains unchanged. +- The ``-Wnontrivial-memaccess`` warning has been updated to also warn about + passing non-trivially-copyable destrination parameter to ``memcpy``, + ``memset`` and similar functions for which it is a documented undefined + behavior. + Removed Compiler Flags ------------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9b9bdd7c800e37a..34ff49d7238a7f5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -795,6 +795,10 @@ def warn_cstruct_memaccess : Warning< "%1 call is a pointer to record %2 that is not trivial to " "%select{primitive-default-initialize|primitive-copy}3">, InGroup; +def warn_cxxstruct_memaccess : Warning< + "first argument in call to " + "%0 is a pointer to non-trivially copyable type %1">, + InGroup; def note_nontrivial_field : Note< "field is non-trivial to %select{copy|default-initialize}0">; def err_non_trivial_c_union_in_invalid_context : Error< diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 27b274d74ce716f..d027e4c6dfdb4dd 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -8899,18 +8899,36 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, << ArgIdx << FnName << PointeeTy << Call->getCallee()->getSourceRange()); else if (const auto *RT = PointeeTy->getAs()) { + + bool IsTriviallyCopyableCXXRecord = + RT->desugar().isTriviallyCopyableType(Context); + if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) && RT->getDecl()->isNonTrivialToPrimitiveDefaultInitialize()) { DiagRuntimeBehavior(Dest->getExprLoc(), Dest, PDiag(diag::warn_cstruct_memaccess) << ArgIdx << FnName << PointeeTy << 0); SearchNonTrivialToInitializeField::diag(PointeeTy, Dest, *this); + } else if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) && + !IsTriviallyCopyableCXXRecord && ArgIdx == 0) { + // FIXME: Limiting this warning to dest argument until we decide + // whether it's valid for source argument too. + DiagRuntimeBehavior(Dest->getExprLoc(), Dest, + PDiag(diag::warn_cxxstruct_memaccess) + << FnName << PointeeTy); } else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) && RT->getDecl()->isNonTrivialToPrimitiveCopy()) { DiagRuntimeBehavior(Dest->getExprLoc(), Dest, PDiag(diag::warn_cstruct_memaccess) << ArgIdx << FnName << PointeeTy << 1); SearchNonTrivialToCopyField::diag(PointeeTy, Dest, *this); + } else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) && + !IsTriviallyCopyableCXXRecord && ArgIdx == 0) { + // FIXME: Limiting this warning to dest argument until we decide + // whether it's valid for source argument too. + DiagRuntimeBehavior(Dest->getExprLoc(), Dest, + PDiag(diag::warn_cxxstruct_memaccess) + << FnName << PointeeTy); } else { continue; } diff --git a/clang/test/SemaCXX/constexpr-string.cpp b/clang/test/SemaCXX/constexpr-string.cpp index c456740ef7551f7..5448365489a514d 100644 --- a/clang/test/SemaCXX/constexpr-string.cpp +++ b/clang/test/SemaCXX/constexpr-string.cpp @@ -670,6 +670,8 @@ namespace MemcpyEtc { constexpr bool test_address_of_incomplete_struct_type() { // expected-error {{never produces a constant}} struct Incomplete; extern Incomplete x, y; + // expected-warning@+2 {{first argument in call to '__builtin_memcpy' is a pointer to non-trivially copyable type 'Incomplete'}} + // expected-note@+1 {{explicitly cast the pointer to silence this warning}} __builtin_memcpy(&x, &x, 4); // expected-note@-1 2{{cannot constant evaluate 'memcpy' between objects of incomplete type 'Incomplete'}} return true; diff --git a/clang/test/SemaCXX/warn-memaccess.cpp b/clang/test/SemaCXX/warn-memaccess.cpp new file mode 100644 index 000000000000000..b4b7f6a6905b23e --- /dev/null +++ b/clang/test/SemaCXX/warn-memaccess.cpp @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 -Wnontrivial-memaccess %s + +extern "C" void *bzero(void *, unsigned); +extern "C" void *memset(void *, int, unsigned); +extern "C" void *memmove(void *s1, const void *s2, unsigned n); +extern "C" void *memcpy(void *s1, const void *s2, unsigned n); + +class TriviallyCopyable {}; +class NonTriviallyCopyable { NonTriviallyCopyable(const NonTriviallyCopyable&);}; + +void test_bzero(TriviallyCopyable* tc, + NonTriviallyCopyable *ntc) { + // OK + bzero(tc, sizeof(*tc)); + + // expected-warning@+2{{first argument in call to 'bzero' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} + // expected-note@+1{{explicitly cast the pointer to silence this warning}} + bzero(ntc, sizeof(*ntc)); + + // OK + bzero((void*)ntc, sizeof(*ntc)); +} + +void test_memset(TriviallyCopyable* tc, + NonTriviallyCopyable *ntc) { + // OK + memset(tc, 0, sizeof(*tc)); + + // expected-warning@+2{{first argument in call to 'memset' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} + // expected-note@+1{{explicitly cast the pointer to silence this warning}} + memset(ntc, 0, sizeof(*ntc)); + + // OK + memset((void*)ntc, 0, sizeof(*ntc)); +} + + +void test_memcpy(TriviallyCopyable* tc0, TriviallyCopyable* tc1, + NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1) { + // OK + memcpy(tc0, tc1, sizeof(*tc0)); + + // expected-warning@+2{{first argument in call to 'memcpy' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} + // expected-note@+1{{explicitly cast the pointer to silence this warning}} + memcpy(ntc0, ntc1, sizeof(*ntc0)); + + // ~ OK + memcpy((void*)ntc0, ntc1, sizeof(*ntc0)); + + // OK + memcpy((void*)ntc0, (void*)ntc1, sizeof(*ntc0)); +} + +void test_memmove(TriviallyCopyable* tc0, TriviallyCopyable* tc1, + NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1) { + // OK + memmove(tc0, tc1, sizeof(*tc0)); + + // expected-warning@+2{{first argument in call to 'memmove' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} + // expected-note@+1{{explicitly cast the pointer to silence this warning}} + memmove(ntc0, ntc1, sizeof(*ntc0)); + + // ~ OK + memmove((void*)ntc0, ntc1, sizeof(*ntc0)); + + // OK + memmove((void*)ntc0, (void*)ntc1, sizeof(*ntc0)); +} diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h index 54af1fa1a1cc55d..3fa948ecc43cff1 100644 --- a/libcxx/include/__memory/uninitialized_algorithms.h +++ b/libcxx/include/__memory/uninitialized_algorithms.h @@ -638,7 +638,8 @@ __uninitialized_allocator_relocate(_Alloc& __alloc, _Tp* __first, _Tp* __last, _ __guard.__complete(); std::__allocator_destroy(__alloc, __first, __last); } else { - __builtin_memcpy(__result, __first, sizeof(_Tp) * (__last - __first)); + // Casting to void* to suppress clang complaining that this is technically UB. + __builtin_memcpy(static_cast(__result), __first, sizeof(_Tp) * (__last - __first)); } } diff --git a/libcxx/test/std/utilities/expected/types.h b/libcxx/test/std/utilities/expected/types.h index 2b6983fb399c672..df73ebdfe495ee3 100644 --- a/libcxx/test/std/utilities/expected/types.h +++ b/libcxx/test/std/utilities/expected/types.h @@ -162,7 +162,7 @@ template struct TailClobberer { constexpr TailClobberer() noexcept { if (!std::is_constant_evaluated()) { - std::memset(this, Constant, sizeof(*this)); + std::memset(static_cast(this), Constant, sizeof(*this)); } // Always set `b` itself to `false` so that the comparison works. b = false; @@ -245,7 +245,7 @@ struct BoolWithPadding { constexpr explicit BoolWithPadding() noexcept : BoolWithPadding(false) {} constexpr BoolWithPadding(bool val) noexcept { if (!std::is_constant_evaluated()) { - std::memset(this, 0, sizeof(*this)); + std::memset(static_cast(this), 0, sizeof(*this)); } val_ = val; } @@ -268,7 +268,7 @@ struct IntWithoutPadding { constexpr explicit IntWithoutPadding() noexcept : IntWithoutPadding(0) {} constexpr IntWithoutPadding(int val) noexcept { if (!std::is_constant_evaluated()) { - std::memset(this, 0, sizeof(*this)); + std::memset(static_cast(this), 0, sizeof(*this)); } val_ = val; } diff --git a/libcxx/test/support/min_allocator.h b/libcxx/test/support/min_allocator.h index 13ee98289c36b7a..18f51f8072640d1 100644 --- a/libcxx/test/support/min_allocator.h +++ b/libcxx/test/support/min_allocator.h @@ -465,14 +465,14 @@ class safe_allocator { TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) { T* memory = std::allocator().allocate(n); if (!TEST_IS_CONSTANT_EVALUATED) - std::memset(memory, 0, sizeof(T) * n); + std::memset(static_cast(memory), 0, sizeof(T) * n); return memory; } TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t n) { if (!TEST_IS_CONSTANT_EVALUATED) - DoNotOptimize(std::memset(p, 0, sizeof(T) * n)); + DoNotOptimize(std::memset(static_cast(p), 0, sizeof(T) * n)); std::allocator().deallocate(p, n); } From 03dcefe08ecb68a3fedb7e9de6277df77371e9fc Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 28 Oct 2024 13:47:31 -0700 Subject: [PATCH 220/425] [libc] Fix leftover `LIBC_NAMESPACE` after porting it (#113960) Summary: There are a few of these leftover, they should all use the `LIBC_NAMESPACE_DECL` version because that implies visibility. --- libc/src/math/cbrt.h | 6 ++++-- libc/src/stdio/gpu/fprintf.cpp | 4 ++-- libc/src/stdio/gpu/printf.cpp | 4 ++-- libc/src/stdio/gpu/vfprintf.cpp | 4 ++-- libc/src/stdio/gpu/vfprintf_utils.h | 5 +++-- libc/src/stdio/gpu/vprintf.cpp | 4 ++-- libc/src/stdio/vsscanf.h | 6 ++++-- 7 files changed, 19 insertions(+), 14 deletions(-) diff --git a/libc/src/math/cbrt.h b/libc/src/math/cbrt.h index a7d5fe80e57b3c4..8cf7d9b221df3f9 100644 --- a/libc/src/math/cbrt.h +++ b/libc/src/math/cbrt.h @@ -9,10 +9,12 @@ #ifndef LLVM_LIBC_SRC_MATH_CBRT_H #define LLVM_LIBC_SRC_MATH_CBRT_H -namespace LIBC_NAMESPACE { +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { double cbrt(double x); -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_CBRT_H diff --git a/libc/src/stdio/gpu/fprintf.cpp b/libc/src/stdio/gpu/fprintf.cpp index 6222589cc4bab96..46196d7d2b10f55 100644 --- a/libc/src/stdio/gpu/fprintf.cpp +++ b/libc/src/stdio/gpu/fprintf.cpp @@ -16,7 +16,7 @@ #include -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, fprintf, (::FILE *__restrict stream, const char *__restrict format, @@ -29,4 +29,4 @@ LLVM_LIBC_FUNCTION(int, fprintf, return ret_val; } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/gpu/printf.cpp b/libc/src/stdio/gpu/printf.cpp index d9903193ef1658b..be1885fd6801d07 100644 --- a/libc/src/stdio/gpu/printf.cpp +++ b/libc/src/stdio/gpu/printf.cpp @@ -15,7 +15,7 @@ #include -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) { va_list vlist; @@ -26,4 +26,4 @@ LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) { return ret_val; } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/gpu/vfprintf.cpp b/libc/src/stdio/gpu/vfprintf.cpp index 961cfa48579e0af..c92685f48c728ba 100644 --- a/libc/src/stdio/gpu/vfprintf.cpp +++ b/libc/src/stdio/gpu/vfprintf.cpp @@ -14,7 +14,7 @@ #include "src/errno/libc_errno.h" #include "src/stdio/gpu/vfprintf_utils.h" -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, vfprintf, (::FILE *__restrict stream, const char *__restrict format, @@ -24,4 +24,4 @@ LLVM_LIBC_FUNCTION(int, vfprintf, return ret_val; } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/gpu/vfprintf_utils.h b/libc/src/stdio/gpu/vfprintf_utils.h index 93ce1649869fc10..5010ee16d96074a 100644 --- a/libc/src/stdio/gpu/vfprintf_utils.h +++ b/libc/src/stdio/gpu/vfprintf_utils.h @@ -9,10 +9,11 @@ #include "hdr/types/FILE.h" #include "src/__support/RPC/rpc_client.h" #include "src/__support/arg_list.h" +#include "src/__support/macros/config.h" #include "src/stdio/gpu/file.h" #include "src/string/string_utils.h" -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { template LIBC_INLINE int vfprintf_impl(::FILE *__restrict file, @@ -82,4 +83,4 @@ LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream, #endif } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/gpu/vprintf.cpp b/libc/src/stdio/gpu/vprintf.cpp index 2bb74d7f017b594..54012f3071844d5 100644 --- a/libc/src/stdio/gpu/vprintf.cpp +++ b/libc/src/stdio/gpu/vprintf.cpp @@ -13,7 +13,7 @@ #include "src/errno/libc_errno.h" #include "src/stdio/gpu/vfprintf_utils.h" -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, vprintf, (const char *__restrict format, va_list vlist)) { @@ -22,4 +22,4 @@ LLVM_LIBC_FUNCTION(int, vprintf, return ret_val; } -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdio/vsscanf.h b/libc/src/stdio/vsscanf.h index 992c44d3d95b9a7..c57b1743e477e1b 100644 --- a/libc/src/stdio/vsscanf.h +++ b/libc/src/stdio/vsscanf.h @@ -9,12 +9,14 @@ #ifndef LLVM_LIBC_SRC_STDIO_VSSCANF_H #define LLVM_LIBC_SRC_STDIO_VSSCANF_H +#include "src/__support/macros/config.h" + #include -namespace LIBC_NAMESPACE { +namespace LIBC_NAMESPACE_DECL { int vsscanf(const char *s, const char *format, va_list vlist); -} // namespace LIBC_NAMESPACE +} // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_STDIO_VSSCANF_H From 39303e24b6f628f3c080f1b54bd12383a55b9b3a Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Mon, 28 Oct 2024 14:00:31 -0700 Subject: [PATCH 221/425] [clang][deps] Improve timing output (#113726) This patch adds the number of executed instructions into the timing output, which provides more stable results compared to wall or process time. The format itself is also tweaked so that it's more amenable for direct import into a spreadsheet editor. --- clang/test/ClangScanDeps/print-timing.c | 3 ++- clang/tools/clang-scan-deps/ClangScanDeps.cpp | 13 +++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/clang/test/ClangScanDeps/print-timing.c b/clang/test/ClangScanDeps/print-timing.c index f27df1ebf732a9c..fa2a433b9553708 100644 --- a/clang/test/ClangScanDeps/print-timing.c +++ b/clang/test/ClangScanDeps/print-timing.c @@ -3,7 +3,8 @@ // RUN: clang-scan-deps -compilation-database %t/cdb.json -print-timing > %t/result.json 2>%t/errs // RUN: cat %t/errs | FileCheck %s -// CHECK: clang-scan-deps timing: {{[0-9]+}}.{{[0-9][0-9]}}s wall, {{[0-9]+}}.{{[0-9][0-9]}}s process +// CHECK: wall time [s] process time [s] instruction count +// CHECK-NEXT: {{[0-9]+}}.{{([0-9]{4})}} {{[0-9]+}}.{{([0-9]{4})}} {{[0-9]+}} //--- cdb.json [] diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 7d36cee7a22b391..f474b1346b1be10 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -1080,10 +1080,15 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { << NumExistsCalls << " exists() calls\n" << NumIsLocalCalls << " isLocal() calls\n"; - if (PrintTiming) - llvm::errs() << llvm::format( - "clang-scan-deps timing: %0.2fs wall, %0.2fs process\n", - T.getTotalTime().getWallTime(), T.getTotalTime().getProcessTime()); + if (PrintTiming) { + llvm::errs() << "wall time [s]\t" + << "process time [s]\t" + << "instruction count\n"; + const llvm::TimeRecord &R = T.getTotalTime(); + llvm::errs() << llvm::format("%0.4f", R.getWallTime()) << "\t" + << llvm::format("%0.4f", R.getProcessTime()) << "\t" + << llvm::format("%llu", R.getInstructionsExecuted()) << "\n"; + } if (RoundTripArgs) if (FD && FD->roundTripCommands(llvm::errs())) From 5ea694816b569e010854a861ad58502c056d5a39 Mon Sep 17 00:00:00 2001 From: vporpo Date: Mon, 28 Oct 2024 14:05:58 -0700 Subject: [PATCH 222/425] [SandboxVec][Legality] Check opcodes and types (#113741) --- .../Vectorize/SandboxVectorizer/VecUtils.h | 12 ++++-- .../Vectorize/SandboxVectorizer/Legality.cpp | 20 +++++++++- .../SandboxVectorizer/CMakeLists.txt | 1 + .../SandboxVectorizer/LegalityTest.cpp | 27 +++++++++++++- .../SandboxVectorizer/VecUtilsTest.cpp | 37 +++++++++++++++++++ 5 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h index 64f57edb38484ef..9577e8ef7b37cb9 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h @@ -12,7 +12,11 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H #define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H -class Utils { +#include "llvm/SandboxIR/Type.h" + +namespace llvm::sandboxir { + +class VecUtils { public: /// \Returns the number of elements in \p Ty. That is the number of lanes if a /// fixed vector or 1 if scalar. ScalableVectors have unknown size and @@ -25,6 +29,8 @@ class Utils { static Type *getElementType(Type *Ty) { return Ty->isVectorTy() ? cast(Ty)->getElementType() : Ty; } -} +}; + +} // namespace llvm::sandboxir -#endif LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H +#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index e4546c2f98113ee..fcfb11c669fa102 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -11,6 +11,7 @@ #include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Debug.h" +#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" namespace llvm::sandboxir { @@ -26,7 +27,24 @@ void LegalityResult::dump() const { std::optional LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( ArrayRef Bndl) { - // TODO: Unimplemented. + auto *I0 = cast(Bndl[0]); + auto Opcode = I0->getOpcode(); + // If they have different opcodes, then we cannot form a vector (for now). + if (any_of(drop_begin(Bndl), [Opcode](Value *V) { + return cast(V)->getOpcode() != Opcode; + })) + return ResultReason::DiffOpcodes; + + // If not the same scalar type, Pack. This will accept scalars and vectors as + // long as the element type is the same. + Type *ElmTy0 = VecUtils::getElementType(Utils::getExpectedType(I0)); + if (any_of(drop_begin(Bndl), [ElmTy0](Value *V) { + return VecUtils::getElementType(Utils::getExpectedType(V)) != ElmTy0; + })) + return ResultReason::DiffTypes; + + // TODO: Missing checks + return std::nullopt; } diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt index 24512cb0225e8ee..df689767b772457 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt @@ -13,4 +13,5 @@ add_llvm_unittest(SandboxVectorizerTests LegalityTest.cpp SchedulerTest.cpp SeedCollectorTest.cpp + VecUtilsTest.cpp ) diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 56c6bf5f1ef1f5c..51f445c8d1d0103 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -29,13 +29,17 @@ struct LegalityTest : public testing::Test { TEST_F(LegalityTest, Legality) { parseIR(C, R"IR( -define void @foo(ptr %ptr) { +define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg) { %gep0 = getelementptr float, ptr %ptr, i32 0 %gep1 = getelementptr float, ptr %ptr, i32 1 + %gep3 = getelementptr float, ptr %ptr, i32 3 %ld0 = load float, ptr %gep0 %ld1 = load float, ptr %gep0 store float %ld0, ptr %gep0 store float %ld1, ptr %gep1 + store <2 x float> %vec2, ptr %gep1 + store <3 x float> %vec3, ptr %gep3 + store i8 %arg, ptr %gep1 ret void } )IR"); @@ -46,10 +50,14 @@ define void @foo(ptr %ptr) { auto It = BB->begin(); [[maybe_unused]] auto *Gep0 = cast(&*It++); [[maybe_unused]] auto *Gep1 = cast(&*It++); + [[maybe_unused]] auto *Gep3 = cast(&*It++); [[maybe_unused]] auto *Ld0 = cast(&*It++); [[maybe_unused]] auto *Ld1 = cast(&*It++); auto *St0 = cast(&*It++); auto *St1 = cast(&*It++); + auto *StVec2 = cast(&*It++); + auto *StVec3 = cast(&*It++); + auto *StI8 = cast(&*It++); sandboxir::LegalityAnalysis Legality; const auto &Result = Legality.canVectorize({St0, St1}); @@ -62,6 +70,23 @@ define void @foo(ptr %ptr) { EXPECT_EQ(cast(Result).getReason(), sandboxir::ResultReason::NotInstructions); } + { + // Check DiffOpcodes + const auto &Result = Legality.canVectorize({St0, Ld0}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::DiffOpcodes); + } + { + // Check DiffTypes + EXPECT_TRUE(isa(Legality.canVectorize({St0, StVec2}))); + EXPECT_TRUE(isa(Legality.canVectorize({StVec2, StVec3}))); + + const auto &Result = Legality.canVectorize({St0, StI8}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::DiffTypes); + } } #ifndef NDEBUG diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp new file mode 100644 index 000000000000000..e0b082849643925 --- /dev/null +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp @@ -0,0 +1,37 @@ +//===- VecUtilsTest.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/SandboxIR/Context.h" +#include "llvm/SandboxIR/Type.h" +#include "gtest/gtest.h" + +using namespace llvm; + +struct VecUtilsTest : public testing::Test { + LLVMContext C; +}; + +TEST_F(VecUtilsTest, GetNumElements) { + sandboxir::Context Ctx(C); + auto *ElemTy = sandboxir::Type::getInt32Ty(Ctx); + EXPECT_EQ(sandboxir::VecUtils::getNumElements(ElemTy), 1); + auto *VTy = sandboxir::FixedVectorType::get(ElemTy, 2); + EXPECT_EQ(sandboxir::VecUtils::getNumElements(VTy), 2); + auto *VTy1 = sandboxir::FixedVectorType::get(ElemTy, 1); + EXPECT_EQ(sandboxir::VecUtils::getNumElements(VTy1), 1); +} + +TEST_F(VecUtilsTest, GetElementType) { + sandboxir::Context Ctx(C); + auto *ElemTy = sandboxir::Type::getInt32Ty(Ctx); + EXPECT_EQ(sandboxir::VecUtils::getElementType(ElemTy), ElemTy); + auto *VTy = sandboxir::FixedVectorType::get(ElemTy, 2); + EXPECT_EQ(sandboxir::VecUtils::getElementType(VTy), ElemTy); +} From 09a4bcf1a549eea738bda74b2b7dc0f5c8309310 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 28 Oct 2024 16:06:22 -0500 Subject: [PATCH 223/425] [flang][OpenMP] Update handling of DEPEND clause (#113620) Parse the locator list in OmpDependClause as an OmpObjectList (instead of a list of Designators). When a common block appears in the locator list, show an informative message. Implement resolving symbols in DependSinkVec in a dedicated visitor instead of having a visitor for OmpDependClause. Resolve unresolved names common blocks in OmpObjectList. Minor changes to the code organization: - rename OmpDependenceType to OmpTaskDependenceType (to follow 5.2 terminology), - rename Depend::WithLocators to Depend::DepType, - add comments with more detailed spec references to parse-tree.h. --------- Co-authored-by: Kiran Chandramohan --- flang/examples/FeatureList/FeatureList.cpp | 4 +-- .../FlangOmpReport/FlangOmpReportVisitor.cpp | 4 +-- .../FlangOmpReport/FlangOmpReportVisitor.h | 2 +- flang/include/flang/Parser/dump-parse-tree.h | 4 +-- flang/include/flang/Parser/parse-tree.h | 32 +++++++++++++------ flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 10 +++--- flang/lib/Lower/OpenMP/Clauses.cpp | 18 ++++------- flang/lib/Parser/openmp-parsers.cpp | 10 +++--- flang/lib/Parser/unparse.cpp | 6 ++-- flang/lib/Semantics/check-omp-structure.cpp | 24 ++++++++------ flang/lib/Semantics/resolve-directives.cpp | 27 ++++++++++------ flang/test/Semantics/OpenMP/depend04.f90 | 10 ++++++ llvm/include/llvm/Frontend/OpenMP/ClauseT.h | 4 +-- 13 files changed, 93 insertions(+), 62 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/depend04.f90 diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index 9fce67e61ed30fa..62f8d39a8abaa5f 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -473,8 +473,8 @@ struct NodeVisitor { READ_FEATURE(OmpDependClause::InOut) READ_FEATURE(OmpDependClause::Sink) READ_FEATURE(OmpDependClause::Source) - READ_FEATURE(OmpDependenceType) - READ_FEATURE(OmpDependenceType::Type) + READ_FEATURE(OmpTaskDependenceType) + READ_FEATURE(OmpTaskDependenceType::Type) READ_FEATURE(OmpDependSinkVec) READ_FEATURE(OmpDependSinkVecLength) READ_FEATURE(OmpEndAllocators) diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp index 5d3c5cd72eef04d..d28ed0534d60026 100644 --- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp @@ -222,9 +222,9 @@ void OpenMPCounterVisitor::Post(const OmpLinearModifier::Type &c) { clauseDetails += "modifier=" + std::string{OmpLinearModifier::EnumToString(c)} + ";"; } -void OpenMPCounterVisitor::Post(const OmpDependenceType::Type &c) { +void OpenMPCounterVisitor::Post(const OmpTaskDependenceType::Type &c) { clauseDetails += - "type=" + std::string{OmpDependenceType::EnumToString(c)} + ";"; + "type=" + std::string{OmpTaskDependenceType::EnumToString(c)} + ";"; } void OpenMPCounterVisitor::Post(const OmpMapClause::Type &c) { clauseDetails += "type=" + std::string{OmpMapClause::EnumToString(c)} + ";"; diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h index 380534ebbfd70ac..68c52db46e2f008 100644 --- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h +++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h @@ -73,7 +73,7 @@ struct OpenMPCounterVisitor { void Post(const OmpDeviceTypeClause::Type &c); void Post(const OmpScheduleModifierType::ModType &c); void Post(const OmpLinearModifier::Type &c); - void Post(const OmpDependenceType::Type &c); + void Post(const OmpTaskDependenceType::Type &c); void Post(const OmpMapClause::Type &c); void Post(const OmpScheduleClause::ScheduleType &c); void Post(const OmpIfClause::DirectiveNameModifier &c); diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index ccdfe980f6f38c2..31ad1b7c6ce5b53 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -513,8 +513,8 @@ class ParseTreeDumper { NODE(OmpDependClause, InOut) NODE(OmpDependClause, Sink) NODE(OmpDependClause, Source) - NODE(parser, OmpDependenceType) - NODE_ENUM(OmpDependenceType, Type) + NODE(parser, OmpTaskDependenceType) + NODE_ENUM(OmpTaskDependenceType, Type) NODE(parser, OmpDependSinkVec) NODE(parser, OmpDependSinkVecLength) NODE(parser, OmpEndAllocators) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 2a312e29a3a44d1..506a470c5557b74 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3439,6 +3439,18 @@ struct OmpObject { WRAPPER_CLASS(OmpObjectList, std::list); +// Ref: [4.5:169-170], [5.0:254-256], [5.1:287-289], [5.2:321] +// +// task-dependence-type -> // "dependence-type" in 5.1 and before +// IN | OUT | INOUT | // since 4.5 +// SOURCE | SINK | // since 4.5, until 5.1 +// MUTEXINOUTSET | DEPOBJ | // since 5.0 +// INOUTSET // since 5.2 +struct OmpTaskDependenceType { + ENUM_CLASS(Type, In, Out, Inout, Source, Sink) + WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Type); +}; + // [5.0] 2.1.6 iterator-specifier -> type-declaration-stmt = subscript-triple // iterator-modifier -> iterator-specifier-list struct OmpIteratorSpecifier { @@ -3534,27 +3546,27 @@ struct OmpDependSinkVecLength { std::tuple t; }; -// 2.13.9 depend-vec -> iterator [+/- depend-vec-length],...,iterator[...] +// 2.13.9 depend-vec -> induction-variable [depend-vec-length], ... struct OmpDependSinkVec { TUPLE_CLASS_BOILERPLATE(OmpDependSinkVec); std::tuple> t; }; -// 2.13.9 depend-type -> IN | OUT | INOUT | SOURCE | SINK -struct OmpDependenceType { - ENUM_CLASS(Type, In, Out, Inout, Source, Sink) - WRAPPER_CLASS_BOILERPLATE(OmpDependenceType, Type); -}; - -// 2.13.9 depend-clause -> DEPEND (((IN | OUT | INOUT) : variable-name-list) | -// SOURCE | SINK : depend-vec) +// Ref: [4.5:169-170], [5.0:255-256], [5.1:288-289], [5.2:323-324] +// +// depend-clause -> +// DEPEND(SOURCE) | // since 4.5, until 5.1 +// DEPEND(SINK: depend-vec) | // since 4.5, until 5.1 +// DEPEND([depend-modifier,]dependence-type: locator-list) // since 4.5 +// +// depend-modifier -> iterator-modifier // since 5.0 struct OmpDependClause { UNION_CLASS_BOILERPLATE(OmpDependClause); EMPTY_CLASS(Source); WRAPPER_CLASS(Sink, std::list); struct InOut { TUPLE_CLASS_BOILERPLATE(InOut); - std::tuple> t; + std::tuple t; }; std::variant u; }; diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index fbc031f3a93d7d7..8fb0dd4a1ec3a70 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -798,11 +798,11 @@ bool ClauseProcessor::processDepend(mlir::omp::DependClauseOps &result) const { return findRepeatableClause( [&](const omp::clause::Depend &clause, const parser::CharBlock &) { using Depend = omp::clause::Depend; - assert(std::holds_alternative(clause.u) && - "Only the modern form is handled at the moment"); - auto &modern = std::get(clause.u); - auto kind = std::get(modern.t); - auto &objects = std::get(modern.t); + assert(std::holds_alternative(clause.u) && + "Only the form with dependence type is handled at the moment"); + auto &depType = std::get(clause.u); + auto kind = std::get(depType.t); + auto &objects = std::get(depType.t); mlir::omp::ClauseTaskDependAttr dependTypeOperand = genDependKindAttr(firOpBuilder, kind); diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 3bd89b543288634..b1fa52751fbd7b0 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -555,7 +555,7 @@ Depend make(const parser::OmpClause::Depend &inp, using Iteration = Doacross::Vector::value_type; // LoopIterationT CLAUSET_ENUM_CONVERT( // - convert1, parser::OmpDependenceType::Type, Depend::TaskDependenceType, + convert1, parser::OmpTaskDependenceType::Type, Depend::TaskDependenceType, // clang-format off MS(In, In) MS(Out, Out) @@ -593,17 +593,13 @@ Depend make(const parser::OmpClause::Depend &inp, return Doacross{{/*DependenceType=*/Doacross::DependenceType::Sink, /*Vector=*/makeList(s.v, convert2)}}; }, - // Depend::WithLocators + // Depend::DepType [&](const wrapped::InOut &s) -> Variant { - auto &t0 = std::get(s.t); - auto &t1 = std::get>(s.t); - auto convert4 = [&](const parser::Designator &t) { - return makeObject(t, semaCtx); - }; - return Depend::WithLocators{ - {/*TaskDependenceType=*/convert1(t0.v), - /*Iterator=*/std::nullopt, - /*LocatorList=*/makeList(t1, convert4)}}; + auto &t0 = std::get(s.t); + auto &t1 = std::get(s.t); + return Depend::DepType{{/*TaskDependenceType=*/convert1(t0.v), + /*Iterator=*/std::nullopt, + /*LocatorList=*/makeObjects(t1, semaCtx)}}; }, }, inp.v.u)}; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index ae0c351fed56d1d..3ca4e93a6c9b933 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -365,10 +365,10 @@ TYPE_PARSER(construct( TYPE_PARSER( construct(name, maybe(Parser{}))) -TYPE_PARSER( - construct("IN"_id >> pure(OmpDependenceType::Type::In) || - "INOUT" >> pure(OmpDependenceType::Type::Inout) || - "OUT" >> pure(OmpDependenceType::Type::Out))) +TYPE_PARSER(construct( + "IN"_id >> pure(OmpTaskDependenceType::Type::In) || + "INOUT" >> pure(OmpTaskDependenceType::Type::Inout) || + "OUT" >> pure(OmpTaskDependenceType::Type::Out))) TYPE_CONTEXT_PARSER("Omp Depend clause"_en_US, construct(construct( @@ -376,7 +376,7 @@ TYPE_CONTEXT_PARSER("Omp Depend clause"_en_US, construct( construct("SOURCE"_tok)) || construct(construct( - Parser{}, ":" >> nonemptyList(designator)))) + Parser{}, ":" >> Parser{}))) // 2.15.3.7 LINEAR (linear-list: linear-step) // linear-list -> list | modifier(list) diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ba4155469073e6a..39fcb61609e33b3 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2216,9 +2216,9 @@ class UnparseVisitor { } void Unparse(const OmpDependClause::InOut &x) { Put("("); - Walk(std::get(x.t)); + Walk(std::get(x.t)); Put(":"); - Walk(std::get>(x.t), ","); + Walk(std::get(x.t)); Put(")"); } bool Pre(const OmpDependClause &x) { @@ -2829,7 +2829,7 @@ class UnparseVisitor { OmpLastprivateClause, LastprivateModifier) // OMP lastprivate-modifier WALK_NESTED_ENUM(OmpScheduleModifierType, ModType) // OMP schedule-modifier WALK_NESTED_ENUM(OmpLinearModifier, Type) // OMP linear-modifier - WALK_NESTED_ENUM(OmpDependenceType, Type) // OMP dependence-type + WALK_NESTED_ENUM(OmpTaskDependenceType, Type) // OMP task-dependence-type WALK_NESTED_ENUM(OmpScheduleClause, ScheduleType) // OMP schedule-type WALK_NESTED_ENUM(OmpDeviceClause, DeviceModifier) // OMP device modifier WALK_NESTED_ENUM(OmpDeviceTypeClause, Type) // OMP DEVICE_TYPE diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 455322d610d6c28..599cc61a83bf0ae 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3288,15 +3288,21 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Depend &x) { parser::ToUpperCaseLetters(getDirectiveName(GetContext().directive))); } if (const auto *inOut{std::get_if(&x.v.u)}) { - const auto &designators{std::get>(inOut->t)}; - for (const auto &ele : designators) { - if (const auto *dataRef{std::get_if(&ele.u)}) { - CheckDependList(*dataRef); - if (const auto *arr{ - std::get_if>( - &dataRef->u)}) { - CheckArraySection(arr->value(), GetLastName(*dataRef), - llvm::omp::Clause::OMPC_depend); + for (const auto &object : std::get(inOut->t).v) { + if (const auto *name{std::get_if(&object.u)}) { + context_.Say(GetContext().clauseSource, + "Common block name ('%s') cannot appear in a DEPEND " + "clause"_err_en_US, + name->ToString()); + } else if (auto *designator{std::get_if(&object.u)}) { + if (auto *dataRef{std::get_if(&designator->u)}) { + CheckDependList(*dataRef); + if (const auto *arr{ + std::get_if>( + &dataRef->u)}) { + CheckArraySection(arr->value(), GetLastName(*dataRef), + llvm::omp::Clause::OMPC_depend); + } } } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 979570a7d4103a5..014b7987a658bd3 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -435,6 +435,20 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { bool Pre(const parser::OpenMPAllocatorsConstruct &); void Post(const parser::OpenMPAllocatorsConstruct &); + void Post(const parser::OmpObjectList &x) { + // The objects from OMP clauses should have already been resolved, + // except common blocks (the ResolveNamesVisitor does not visit + // parser::Name, those are dealt with as members of other structures). + // Iterate over elements of x, and resolve any common blocks that + // are still unresolved. + for (const parser::OmpObject &obj : x.v) { + auto *name{std::get_if(&obj.u)}; + if (name && !name->symbol) { + Resolve(*name, currScope().MakeCommonBlock(name->source)); + } + } + } + // 2.15.3 Data-Sharing Attribute Clauses void Post(const parser::OmpDefaultClause &); bool Pre(const parser::OmpClause::Shared &x) { @@ -531,16 +545,9 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { return false; } - bool Pre(const parser::OmpDependClause &x) { - if (const auto *dependSink{ - std::get_if(&x.u)}) { - const auto &dependSinkVec{dependSink->v}; - for (const auto &dependSinkElement : dependSinkVec) { - const auto &name{std::get(dependSinkElement.t)}; - ResolveName(&name); - } - } - return false; + void Post(const parser::OmpDependSinkVec &x) { + const auto &name{std::get(x.t)}; + ResolveName(&name); } bool Pre(const parser::OmpClause::UseDevicePtr &x) { diff --git a/flang/test/Semantics/OpenMP/depend04.f90 b/flang/test/Semantics/OpenMP/depend04.f90 new file mode 100644 index 000000000000000..8bdddb017d2c9d9 --- /dev/null +++ b/flang/test/Semantics/OpenMP/depend04.f90 @@ -0,0 +1,10 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50 + +subroutine f00 + integer :: x + common /cc/ x +!ERROR: Common block name ('cc') cannot appear in a DEPEND clause + !$omp task depend(in: /cc/) + x = 0 + !$omp end task +end diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index ac34ddafc5e726e..2a890905dc6323e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -503,7 +503,7 @@ struct DependT { using LocatorList = ObjectListT; using TaskDependenceType = tomp::type::TaskDependenceType; - struct WithLocators { // Modern form + struct DepType { // The form with task dependence type. using TupleTrait = std::true_type; // Empty LocatorList means "omp_all_memory". std::tuple t; @@ -511,7 +511,7 @@ struct DependT { using Doacross = DoacrossT; using UnionTrait = std::true_type; - std::variant u; // Doacross form is legacy + std::variant u; // Doacross form is legacy }; // V5.2: [3.5] `destroy` clause From c5edecbb4bfe08997819ff84712e3e22ddd04490 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 28 Oct 2024 21:36:48 +0000 Subject: [PATCH 224/425] [X86] Regenerate scmp/ucmp test checks with vpternlog comments --- llvm/test/CodeGen/X86/scmp.ll | 2 +- llvm/test/CodeGen/X86/ucmp.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll index 0746a07d2cdf268..5ae5caf3e88b200 100644 --- a/llvm/test/CodeGen/X86/scmp.ll +++ b/llvm/test/CodeGen/X86/scmp.ll @@ -848,7 +848,7 @@ define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k1 ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k2 ; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 ; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/ucmp.ll b/llvm/test/CodeGen/X86/ucmp.ll index cd643cb8d637519..6a52acfe2fb3059 100644 --- a/llvm/test/CodeGen/X86/ucmp.ll +++ b/llvm/test/CodeGen/X86/ucmp.ll @@ -819,7 +819,7 @@ define <16 x i32> @ucmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512-NEXT: vpcmpltub %xmm1, %xmm0, %k1 ; AVX512-NEXT: vpcmpnleub %xmm1, %xmm0, %k2 ; AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 +; AVX512-NEXT: vpternlogd {{.*#+}} zmm1 = -1 ; AVX512-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} ; AVX512-NEXT: retq ; From 7b663bd9179a205d5a65a34e447fbeffcb43c194 Mon Sep 17 00:00:00 2001 From: Job Henandez Lara Date: Mon, 28 Oct 2024 14:49:36 -0700 Subject: [PATCH 225/425] [libc] Fix the remaining fcntl.h proxy header includes. (#113961) --- libc/src/stdio/linux/CMakeLists.txt | 1 + libc/src/stdio/linux/rename.cpp | 2 +- libc/src/sys/mman/linux/CMakeLists.txt | 3 +-- libc/src/sys/mman/linux/shm_open.cpp | 2 +- libc/src/sys/mman/shm_open.h | 2 +- libc/src/sys/stat/linux/CMakeLists.txt | 2 ++ libc/src/sys/stat/linux/chmod.cpp | 1 + libc/src/sys/stat/linux/mkdir.cpp | 1 + libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp | 2 +- 9 files changed, 10 insertions(+), 6 deletions(-) diff --git a/libc/src/stdio/linux/CMakeLists.txt b/libc/src/stdio/linux/CMakeLists.txt index e81642dc6f01e6f..1b2fcb33ce54d7f 100644 --- a/libc/src/stdio/linux/CMakeLists.txt +++ b/libc/src/stdio/linux/CMakeLists.txt @@ -22,6 +22,7 @@ add_entrypoint_object( libc.include.sys_syscall libc.src.__support.OSUtil.osutil libc.src.errno.errno + libc.hdr.fcntl_macros ) add_entrypoint_object( diff --git a/libc/src/stdio/linux/rename.cpp b/libc/src/stdio/linux/rename.cpp index 69fd22720ed1957..fbcb29be48f4e24 100644 --- a/libc/src/stdio/linux/rename.cpp +++ b/libc/src/stdio/linux/rename.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/stdio/rename.h" -#include "include/llvm-libc-macros/linux/fcntl-macros.h" +#include "hdr/fcntl_macros.h" #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/__support/macros/config.h" diff --git a/libc/src/sys/mman/linux/CMakeLists.txt b/libc/src/sys/mman/linux/CMakeLists.txt index 11188254cfbd459..47c16f79bc8d580 100644 --- a/libc/src/sys/mman/linux/CMakeLists.txt +++ b/libc/src/sys/mman/linux/CMakeLists.txt @@ -187,8 +187,7 @@ add_entrypoint_object( ../shm_open.h DEPENDS libc.src.fcntl.open - libc.include.llvm-libc-macros.fcntl_macros - libc.include.llvm-libc-types.mode_t + libc.hdr.types.mode_t .shm_common ) diff --git a/libc/src/sys/mman/linux/shm_open.cpp b/libc/src/sys/mman/linux/shm_open.cpp index d235e57aefdeb13..11de482272d00a6 100644 --- a/libc/src/sys/mman/linux/shm_open.cpp +++ b/libc/src/sys/mman/linux/shm_open.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/sys/mman/shm_open.h" -#include "llvm-libc-macros/fcntl-macros.h" +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/fcntl/open.h" #include "src/sys/mman/linux/shm_common.h" diff --git a/libc/src/sys/mman/shm_open.h b/libc/src/sys/mman/shm_open.h index c890304aa4acf9b..1872dd30cb6f5e3 100644 --- a/libc/src/sys/mman/shm_open.h +++ b/libc/src/sys/mman/shm_open.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_SYS_MMAN_SHM_OPEN_H #define LLVM_LIBC_SRC_SYS_MMAN_SHM_OPEN_H +#include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" -#include namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sys/stat/linux/CMakeLists.txt b/libc/src/sys/stat/linux/CMakeLists.txt index 7c9496b6b6e8c6d..9aeb14636c2c1a5 100644 --- a/libc/src/sys/stat/linux/CMakeLists.txt +++ b/libc/src/sys/stat/linux/CMakeLists.txt @@ -6,6 +6,7 @@ add_entrypoint_object( ../chmod.h DEPENDS libc.hdr.types.mode_t + libc.hdr.fcntl_macros libc.include.sys_stat libc.include.sys_syscall libc.src.__support.OSUtil.osutil @@ -47,6 +48,7 @@ add_entrypoint_object( ../mkdir.h DEPENDS libc.hdr.types.mode_t + libc.hdr.fcntl_macros libc.include.sys_stat libc.include.sys_syscall libc.src.__support.OSUtil.osutil diff --git a/libc/src/sys/stat/linux/chmod.cpp b/libc/src/sys/stat/linux/chmod.cpp index 9d2860391818109..57d5bae6b81915c 100644 --- a/libc/src/sys/stat/linux/chmod.cpp +++ b/libc/src/sys/stat/linux/chmod.cpp @@ -11,6 +11,7 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" diff --git a/libc/src/sys/stat/linux/mkdir.cpp b/libc/src/sys/stat/linux/mkdir.cpp index bd6efef858c7b6c..b319b5c8393de75 100644 --- a/libc/src/sys/stat/linux/mkdir.cpp +++ b/libc/src/sys/stat/linux/mkdir.cpp @@ -11,6 +11,7 @@ #include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" +#include "hdr/fcntl_macros.h" #include "hdr/types/mode_t.h" #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" diff --git a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp index 8cb5f867453e45e..2f3e0b96ff09574 100644 --- a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp +++ b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp @@ -1,4 +1,4 @@ -#include "llvm-libc-macros/linux/fcntl-macros.h" +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/fcntl/open.h" #include "src/sys/statvfs/fstatvfs.h" From 82cb22e735be24acf5aac594e519935fc43f0aec Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Mon, 28 Oct 2024 21:50:23 +0000 Subject: [PATCH 226/425] [MLIR] Add missing MLIRLLVMDialect dep to MLIRMathToLibm (#113563) This fixes the following failure when doing a clean build (in particular no .ninja* lying around) of lib/libMLIRMathToLibm.a only: ``` In file included from llvm/include/llvm/IR/Module.h:22, from mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h:37, from mlir/lib/Conversion/MathToLibm/MathToLibm.cpp:13 llvm/include/llvm/IR/Attributes.h:90:14: fatal error: llvm/IR/Attributes.inc: No such file or directory ``` --- mlir/lib/Conversion/MathToLibm/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt index 61c46e9bfe250e5..0a4eb97474f3ab9 100644 --- a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt +++ b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt @@ -14,6 +14,7 @@ add_mlir_conversion_library(MLIRMathToLibm MLIRArithDialect MLIRDialectUtils MLIRFuncDialect + MLIRLLVMDialect MLIRMathDialect MLIRPass MLIRTransformUtils From 7db4cacfd72b64de8460509d07a22b9142df85f6 Mon Sep 17 00:00:00 2001 From: Thomas Preud'homme Date: Mon, 28 Oct 2024 21:53:39 +0000 Subject: [PATCH 227/425] [MLIR] Add missing MLIRLLVMDialect dep to MLIRLinalgToStandard (#113561) This fixes the following failure when doing a clean build (in particular no .ninja* lying around) of lib/libMLIRLinalgToStandard.a only: ``` In file included from llvm/include/llvm/IR/Module.h:22, from mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h:37, from mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp:13: llvm/include/llvm/IR/Attributes.h:90:14: fatal error: llvm/IR/Attributes.inc: No such file or directory ``` --- mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt index cbe85789b29a371..7fc4af540318552 100644 --- a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt +++ b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt @@ -15,6 +15,7 @@ add_mlir_conversion_library(MLIRLinalgToStandard MLIRIR MLIRLinalgDialect MLIRLinalgTransforms + MLIRLLVMDialect MLIRMemRefDialect MLIRPass MLIRSCFDialect From 474234a09655e57b7a4270150f0926db77e864b4 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Mon, 28 Oct 2024 19:02:07 -0300 Subject: [PATCH 228/425] [debugserver] Mark ASAN memory regions as "heap" (#113968) This memory type is currently not handled, but it makes sense to mark it as a heap allocation in requests asking for memory region info. --- lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp index 60d4c3bc293a3c6..97908b4acaf284f 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp +++ b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp @@ -208,7 +208,8 @@ std::vector MachVMRegion::GetMemoryTypes() const { m_data.user_tag == VM_MEMORY_MALLOC_LARGE_REUSABLE || m_data.user_tag == VM_MEMORY_MALLOC_HUGE || m_data.user_tag == VM_MEMORY_REALLOC || - m_data.user_tag == VM_MEMORY_SBRK) { + m_data.user_tag == VM_MEMORY_SBRK || + m_data.user_tag == VM_MEMORY_SANITIZER) { types.push_back("heap"); if (m_data.user_tag == VM_MEMORY_MALLOC_TINY) { types.push_back("malloc-tiny"); From 9f69da35e2e5438d0c042f76277fff397f6a1505 Mon Sep 17 00:00:00 2001 From: Prabhuk Date: Mon, 28 Oct 2024 15:24:47 -0700 Subject: [PATCH 229/425] [NFC][compiler-rt] Add missing header include (#113951) Include `cstdlib` which was originally included transitively but the changes to `vector` in libcpp breaks new builds due to missing cstdlib header for `abort()` function call. --- compiler-rt/include/fuzzer/FuzzedDataProvider.h | 1 + 1 file changed, 1 insertion(+) diff --git a/compiler-rt/include/fuzzer/FuzzedDataProvider.h b/compiler-rt/include/fuzzer/FuzzedDataProvider.h index 5903ed837917ca9..e57b95b6304a9a0 100644 --- a/compiler-rt/include/fuzzer/FuzzedDataProvider.h +++ b/compiler-rt/include/fuzzer/FuzzedDataProvider.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include From bf4b31ad54f6bf298210c201a0afe50469ebe98e Mon Sep 17 00:00:00 2001 From: vporpo Date: Mon, 28 Oct 2024 15:32:20 -0700 Subject: [PATCH 230/425] [SandboxVec][Legality] Check Fastmath flags (#113967) --- .../Vectorize/SandboxVectorizer/Legality.h | 3 +++ .../Vectorize/SandboxVectorizer/Legality.cpp | 12 ++++++++++++ .../Vectorize/SandboxVectorizer/LegalityTest.cpp | 16 +++++++++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index d4b0b54375b0267..49dcec26dbc5599 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -31,6 +31,7 @@ enum class ResultReason { NotInstructions, DiffOpcodes, DiffTypes, + DiffMathFlags, }; #ifndef NDEBUG @@ -53,6 +54,8 @@ struct ToStr { return "DiffOpcodes"; case ResultReason::DiffTypes: return "DiffTypes"; + case ResultReason::DiffMathFlags: + return "DiffMathFlags"; } llvm_unreachable("Unknown ResultReason enum"); } diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index fcfb11c669fa102..346d8a90589f555 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -8,6 +8,7 @@ #include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h" #include "llvm/SandboxIR/Instruction.h" +#include "llvm/SandboxIR/Operator.h" #include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/Debug.h" @@ -43,6 +44,17 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( })) return ResultReason::DiffTypes; + // TODO: Allow vectorization of instrs with different flags as long as we + // change them to the least common one. + // For now pack if differnt FastMathFlags. + if (isa(I0)) { + FastMathFlags FMF0 = cast(Bndl[0])->getFastMathFlags(); + if (any_of(drop_begin(Bndl), [FMF0](auto *V) { + return cast(V)->getFastMathFlags() != FMF0; + })) + return ResultReason::DiffMathFlags; + } + // TODO: Missing checks return std::nullopt; diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index 51f445c8d1d0103..aaa8e96de6d171c 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -29,7 +29,7 @@ struct LegalityTest : public testing::Test { TEST_F(LegalityTest, Legality) { parseIR(C, R"IR( -define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg) { +define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float %farg0, float %farg1) { %gep0 = getelementptr float, ptr %ptr, i32 0 %gep1 = getelementptr float, ptr %ptr, i32 1 %gep3 = getelementptr float, ptr %ptr, i32 3 @@ -40,6 +40,8 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg) { store <2 x float> %vec2, ptr %gep1 store <3 x float> %vec3, ptr %gep3 store i8 %arg, ptr %gep1 + %fadd0 = fadd float %farg0, %farg0 + %fadd1 = fadd fast float %farg1, %farg1 ret void } )IR"); @@ -58,6 +60,8 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg) { auto *StVec2 = cast(&*It++); auto *StVec3 = cast(&*It++); auto *StI8 = cast(&*It++); + auto *FAdd0 = cast(&*It++); + auto *FAdd1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; const auto &Result = Legality.canVectorize({St0, St1}); @@ -87,6 +91,13 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg) { EXPECT_EQ(cast(Result).getReason(), sandboxir::ResultReason::DiffTypes); } + { + // Check DiffMathFlags + const auto &Result = Legality.canVectorize({FAdd0, FAdd1}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::DiffMathFlags); + } } #ifndef NDEBUG @@ -110,5 +121,8 @@ TEST_F(LegalityTest, LegalityResultDump) { EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffTypes), "Pack Reason: DiffTypes")); + EXPECT_TRUE(Matches(Legality.createLegalityResult( + sandboxir::ResultReason::DiffMathFlags), + "Pack Reason: DiffMathFlags")); } #endif // NDEBUG From e873b415a2bfbbc2e5e2e70d77b61b7883cbf949 Mon Sep 17 00:00:00 2001 From: Job Henandez Lara Date: Mon, 28 Oct 2024 15:32:45 -0700 Subject: [PATCH 231/425] [libc] add the rest of the hdr/fcntl_macro.h headers (#113972) --- libc/src/fcntl/linux/CMakeLists.txt | 1 + libc/src/fcntl/linux/open.cpp | 1 + libc/src/spawn/linux/CMakeLists.txt | 1 + libc/src/spawn/linux/posix_spawn.cpp | 1 + 4 files changed, 4 insertions(+) diff --git a/libc/src/fcntl/linux/CMakeLists.txt b/libc/src/fcntl/linux/CMakeLists.txt index ecfb2cdd3f3361c..580db16cd413205 100644 --- a/libc/src/fcntl/linux/CMakeLists.txt +++ b/libc/src/fcntl/linux/CMakeLists.txt @@ -29,6 +29,7 @@ add_entrypoint_object( ../open.h DEPENDS libc.hdr.types.mode_t + libc.hdr.fcntl_macros libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/fcntl/linux/open.cpp b/libc/src/fcntl/linux/open.cpp index 9f3d035388e8652..8b699ecdd2043c7 100644 --- a/libc/src/fcntl/linux/open.cpp +++ b/libc/src/fcntl/linux/open.cpp @@ -13,6 +13,7 @@ #include "src/__support/macros/config.h" #include "src/errno/libc_errno.h" +#include "hdr/fcntl_macros.h" #include "hdr/types/mode_t.h" #include #include // For syscall numbers. diff --git a/libc/src/spawn/linux/CMakeLists.txt b/libc/src/spawn/linux/CMakeLists.txt index 5f7ab4c43165dcc..26148fe1c76dbdc 100644 --- a/libc/src/spawn/linux/CMakeLists.txt +++ b/libc/src/spawn/linux/CMakeLists.txt @@ -6,6 +6,7 @@ add_entrypoint_object( ../posix_spawn.h DEPENDS libc.hdr.types.mode_t + libc.hdr.fcntl_macros libc.include.spawn libc.include.sys_syscall libc.include.signal diff --git a/libc/src/spawn/linux/posix_spawn.cpp b/libc/src/spawn/linux/posix_spawn.cpp index d6caf8b374a02bc..fe82ba260148a61 100644 --- a/libc/src/spawn/linux/posix_spawn.cpp +++ b/libc/src/spawn/linux/posix_spawn.cpp @@ -14,6 +14,7 @@ #include "src/__support/macros/config.h" #include "src/spawn/file_actions.h" +#include "hdr/fcntl_macros.h" #include "hdr/types/mode_t.h" #include // For SIGCHLD #include From 0eb5c9d2ef8d932eef84d4db8aef3dd512f80277 Mon Sep 17 00:00:00 2001 From: Renaud Kauffmann Date: Mon, 28 Oct 2024 15:34:27 -0700 Subject: [PATCH 232/425] [flang][cuda] Copying device globals in the gpu module (#113955) --- .../Optimizer/Transforms/CUFDeviceGlobal.cpp | 27 +++++++++++++++++++ flang/test/Fir/CUDA/cuda-device-global.f90 | 13 +++++++++ 2 files changed, 40 insertions(+) create mode 100644 flang/test/Fir/CUDA/cuda-device-global.f90 diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp index a4761f24f16d7be..dc39be8574f8448 100644 --- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp +++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp @@ -11,6 +11,7 @@ #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/allocatable.h" #include "mlir/IR/SymbolTable.h" @@ -58,6 +59,32 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase { prepareImplicitDeviceGlobals(funcOp, symTable); return mlir::WalkResult::advance(); }); + + // Copying the device global variable into the gpu module + mlir::SymbolTable parentSymTable(mod); + auto gpuMod = + parentSymTable.lookup(cudaDeviceModuleName); + if (gpuMod) { + mlir::SymbolTable gpuSymTable(gpuMod); + for (auto globalOp : mod.getOps()) { + auto attr = globalOp.getDataAttrAttr(); + if (!attr) + continue; + switch (attr.getValue()) { + case cuf::DataAttribute::Device: + case cuf::DataAttribute::Constant: + case cuf::DataAttribute::Managed: { + auto globalName{globalOp.getSymbol().getValue()}; + if (gpuSymTable.lookup(globalName)) { + break; + } + gpuSymTable.insert(globalOp->clone()); + } break; + default: + break; + } + } + } } }; } // namespace diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90 new file mode 100644 index 000000000000000..c83a938d5af2141 --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-device-global.f90 @@ -0,0 +1,13 @@ + +// RUN: fir-opt --split-input-file --cuf-device-global %s | FileCheck %s + + +module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} { + fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda} : !fir.array<5xi32> + + gpu.module @cuda_device_mod [#nvvm.target] { + } +} + +// CHECK: gpu.module @cuda_device_mod [#nvvm.target] +// CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda} : !fir.array<5xi32> From 36c119490630846c1fa0f427cc60837fd7b40a28 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Mon, 28 Oct 2024 15:34:56 -0700 Subject: [PATCH 233/425] Remove optimization flags from clang codegen tests (#113714) - Remove an -O3 flag from a couple of clang x86 codegen tests so the tests do not need to be updated when optimizations in LLVM change. - Change the tests to use utils/update_cc_test_checks.sh - Change from apple/darwin triples to generic x86_64-- and i386-- because it was not relevant to the test but `update_cc_test_checks` seems to be unable to handle platforms that prepend `_` to function names. --- clang/test/CodeGen/X86/avx-cmp-builtins.c | 104 ++++-- clang/test/CodeGen/X86/avx-shuffle-builtins.c | 302 +++++++++++++----- clang/test/CodeGen/X86/sse.c | 58 +++- 3 files changed, 348 insertions(+), 116 deletions(-) diff --git a/clang/test/CodeGen/X86/avx-cmp-builtins.c b/clang/test/CodeGen/X86/avx-cmp-builtins.c index c4e3c7ccd54988d..2e4a383a6b3fca1 100644 --- a/clang/test/CodeGen/X86/avx-cmp-builtins.c +++ b/clang/test/CodeGen/X86/avx-cmp-builtins.c @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -O3 -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s -// FIXME: The shufflevector instructions in test_cmpgt_sd are relying on O3 here. +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s #include @@ -9,62 +9,124 @@ // Test LLVM IR codegen of cmpXY instructions // +// CHECK-LABEL: define dso_local <2 x double> @test_cmp_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A]], <2 x double> [[B]], i8 13) +// CHECK-NEXT: ret <2 x double> [[TMP0]] +// __m128d test_cmp_sd(__m128d a, __m128d b) { // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 13) return _mm_cmp_sd(a, b, _CMP_GE_OS); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmp_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A]], <4 x float> [[B]], i8 13) +// CHECK-NEXT: ret <4 x float> [[TMP0]] +// __m128 test_cmp_ss(__m128 a, __m128 b) { // Expects that the third argument in LLVM IR is immediate expression - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 13) return _mm_cmp_ss(a, b, _CMP_GE_OS); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpgt_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 1) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpgt_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpgt_ss(a, b); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpge_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 2) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpge_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpge_ss(a, b); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpngt_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 5) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpngt_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpngt_ss(a, b); } +// CHECK-LABEL: define dso_local <4 x float> @test_cmpnge_ss( +// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 6) +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> +// CHECK-NEXT: ret <4 x float> [[SHUFFLE_I]] +// __m128 test_cmpnge_ss(__m128 a, __m128 b) { - // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6) - // CHECK: shufflevector <{{.*}}, <4 x i32> return _mm_cmpnge_ss(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpgt_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 1) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpgt_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 1) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpgt_sd(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpge_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 2) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpge_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 2) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpge_sd(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpngt_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 5) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpngt_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 5) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpngt_sd(a, b); } +// CHECK-LABEL: define dso_local <2 x double> @test_cmpnge_sd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 6) +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0 +// CHECK-NEXT: [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1 +// CHECK-NEXT: ret <2 x double> [[VECINIT2_I]] +// __m128d test_cmpnge_sd(__m128d a, __m128d b) { - // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 6) - // CHECK: shufflevector <{{.*}}, <2 x i32> return _mm_cmpnge_sd(a, b); } diff --git a/clang/test/CodeGen/X86/avx-shuffle-builtins.c b/clang/test/CodeGen/X86/avx-shuffle-builtins.c index d184d28f3e07aa9..1c05fa436983ed7 100644 --- a/clang/test/CodeGen/X86/avx-shuffle-builtins.c +++ b/clang/test/CodeGen/X86/avx-shuffle-builtins.c @@ -1,7 +1,7 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // REQUIRES: x86-registered-target -// RUN: %clang_cc1 -ffreestanding %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64 -// RUN: %clang_cc1 -ffreestanding %s -O3 -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86 -// FIXME: This is testing optimized generation of shuffle instructions and should be fixed. +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=i386-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s #include @@ -10,201 +10,341 @@ // Test LLVM IR codegen of shuffle instructions, checking if the masks are correct // +// CHECK-LABEL: define dso_local <8 x float> @x( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFP:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[SHUFP]] +// __m256 x(__m256 a, __m256 b) { - // CHECK-LABEL: x - // CHECK: shufflevector{{.*}} return _mm256_shuffle_ps(a, b, 203); } +// CHECK-LABEL: define dso_local <2 x double> @test_mm_permute_pd( +// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> +// CHECK-NEXT: ret <2 x double> [[PERMIL]] +// __m128d test_mm_permute_pd(__m128d a) { - // CHECK-LABEL: test_mm_permute_pd - // CHECK: shufflevector{{.*}} return _mm_permute_pd(a, 1); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute_pd( +// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +// CHECK-NEXT: ret <4 x double> [[PERMIL]] +// __m256d test_mm256_permute_pd(__m256d a) { - // CHECK-LABEL: test_mm256_permute_pd - // CHECK: shufflevector{{.*}} return _mm256_permute_pd(a, 5); } +// CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps( +// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[PERMIL]] +// __m128 test_mm_permute_ps(__m128 a) { - // CHECK-LABEL: test_mm_permute_ps - // CHECK: shufflevector{{.*}} return _mm_permute_ps(a, 0x1b); } -// Test case for PR12401 +// CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps2( +// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[PERMIL]] +// __m128 test_mm_permute_ps2(__m128 a) { - // CHECK-LABEL: test_mm_permute_ps2 - // CHECK: shufflevector{{.*}} return _mm_permute_ps(a, 0xe6); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute_ps( +// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[PERMIL:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> +// CHECK-NEXT: ret <8 x float> [[PERMIL]] +// __m256 test_mm256_permute_ps(__m256 a) { - // CHECK-LABEL: test_mm256_permute_ps - // CHECK: shufflevector{{.*}} return _mm256_permute_ps(a, 0x1b); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute2f128_pd( +// CHECK-SAME: <4 x double> noundef [[A:%.*]], <4 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VPERM:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[VPERM]] +// __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) { - // CHECK-LABEL: test_mm256_permute2f128_pd - // CHECK: shufflevector{{.*}} return _mm256_permute2f128_pd(a, b, 0x31); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute2f128_ps( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[VPERM:%.*]] = shufflevector <8 x float> [[B]], <8 x float> [[A]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[VPERM]] +// __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) { - // CHECK-LABEL: test_mm256_permute2f128_ps - // CHECK: shufflevector{{.*}} return _mm256_permute2f128_ps(a, b, 0x13); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_permute2f128_si256( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <4 x i64> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32> +// CHECK-NEXT: [[VPERM:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[TMP1]], <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[VPERM]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP2]] +// __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) { - // CHECK-LABEL: test_mm256_permute2f128_si256 - // CHECK: shufflevector{{.*}} return _mm256_permute2f128_si256(a, b, 0x20); } -__m128 -test_mm_broadcast_ss(float const *__a) { - // CHECK-LABEL: test_mm_broadcast_ss - // CHECK: insertelement <4 x float> {{.*}}, i64 0 - // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> poison, <4 x i32> zeroinitializer +// CHECK-LABEL: define dso_local <4 x float> @test_mm_broadcast_ss( +// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__A]], align 1 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[TMP0]], i32 1 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[TMP0]], i32 2 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x float> [[VECINIT3_I]], float [[TMP0]], i32 3 +// CHECK-NEXT: ret <4 x float> [[VECINIT4_I]] +// +__m128 test_mm_broadcast_ss(float const *__a) { return _mm_broadcast_ss(__a); } -__m256d -test_mm256_broadcast_sd(double const *__a) { - // CHECK-LABEL: test_mm256_broadcast_sd - // CHECK: insertelement <4 x double> {{.*}}, i64 0 - // CHECK: shufflevector <4 x double> {{.*}}, <4 x double> poison, <4 x i32> zeroinitializer +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_broadcast_sd( +// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[__A]], align 1 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x double> [[VECINIT_I]], double [[TMP0]], i32 1 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x double> [[VECINIT2_I]], double [[TMP0]], i32 2 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <4 x double> [[VECINIT3_I]], double [[TMP0]], i32 3 +// CHECK-NEXT: ret <4 x double> [[VECINIT4_I]] +// +__m256d test_mm256_broadcast_sd(double const *__a) { return _mm256_broadcast_sd(__a); } -__m256 -test_mm256_broadcast_ss(float const *__a) { - // CHECK-LABEL: test_mm256_broadcast_ss - // CHECK: insertelement <8 x float> {{.*}}, i64 0 - // CHECK: shufflevector <8 x float> {{.*}}, <8 x float> poison, <8 x i32> zeroinitializer +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_broadcast_ss( +// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[__A]], align 1 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> poison, float [[TMP0]], i32 0 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP0]], i32 1 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP0]], i32 2 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP0]], i32 3 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP0]], i32 4 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP0]], i32 5 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP0]], i32 6 +// CHECK-NEXT: [[VECINIT8_I:%.*]] = insertelement <8 x float> [[VECINIT7_I]], float [[TMP0]], i32 7 +// CHECK-NEXT: ret <8 x float> [[VECINIT8_I]] +// +__m256 test_mm256_broadcast_ss(float const *__a) { return _mm256_broadcast_ss(__a); } // Make sure we have the correct mask for each insertf128 case. +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_0( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[INSERT]] +// __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) { - // CHECK-LABEL: test_mm256_insertf128_ps_0 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_ps(a, b, 0); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_0( +// CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[INSERT]] +// __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) { - // CHECK-LABEL: test_mm256_insertf128_pd_0 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_pd(a, b, 0); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_0( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32> +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP2]] +// __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) { - // CHECK-LABEL: test_mm256_insertf128_si256_0 - // X64: shufflevector{{.*}} - // X86: shufflevector{{.*}} return _mm256_insertf128_si256(a, b, 0); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_1( +// CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[INSERT]] +// __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) { - // CHECK-LABEL: test_mm256_insertf128_ps_1 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_ps(a, b, 1); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_1( +// CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[INSERT]] +// __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) { - // CHECK-LABEL: test_mm256_insertf128_pd_1 - // CHECK: shufflevector{{.*}} return _mm256_insertf128_pd(a, b, 1); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_1( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32> +// CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> +// CHECK-NEXT: [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64> +// CHECK-NEXT: ret <4 x i64> [[TMP2]] +// __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) { - // CHECK-LABEL: test_mm256_insertf128_si256_1 - // X64: shufflevector{{.*}} - // X86: shufflevector{{.*}} return _mm256_insertf128_si256(a, b, 1); } // Make sure we have the correct mask for each extractf128 case. +// CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_0( +// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[EXTRACT]] +// __m128 test_mm256_extractf128_ps_0(__m256 a) { - // X64-LABEL: test_mm256_extractf128_ps_0 - // X64: shufflevector{{.*}} - // - // X86-LABEL: test_mm256_extractf128_ps_0 - // X86: shufflevector{{.*}} return _mm256_extractf128_ps(a, 0); } +// CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_0( +// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> +// CHECK-NEXT: ret <2 x double> [[EXTRACT]] +// __m128d test_mm256_extractf128_pd_0(__m256d a) { - // CHECK-LABEL: test_mm256_extractf128_pd_0 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_pd(a, 0); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_0( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// __m128i test_mm256_extractf128_si256_0(__m256i a) { - // CHECK-LABEL: test_mm256_extractf128_si256_0 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_si256(a, 0); } +// CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_1( +// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> +// CHECK-NEXT: ret <4 x float> [[EXTRACT]] +// __m128 test_mm256_extractf128_ps_1(__m256 a) { - // X64-LABEL: test_mm256_extractf128_ps_1 - // X64: shufflevector{{.*}} - // - // X86-LABEL: test_mm256_extractf128_ps_1 - // X86: shufflevector{{.*}} return _mm256_extractf128_ps(a, 1); } +// CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_1( +// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> +// CHECK-NEXT: ret <2 x double> [[EXTRACT]] +// __m128d test_mm256_extractf128_pd_1(__m256d a) { - // CHECK-LABEL: test_mm256_extractf128_pd_1 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_pd(a, 1); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_1( +// CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32> +// CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP1]] +// __m128i test_mm256_extractf128_si256_1(__m256i a) { - // CHECK-LABEL: test_mm256_extractf128_si256_1 - // CHECK: shufflevector{{.*}} return _mm256_extractf128_si256(a, 1); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_set_m128( +// CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[SHUFFLE_I]] +// __m256 test_mm256_set_m128(__m128 hi, __m128 lo) { - // CHECK-LABEL: test_mm256_set_m128 - // CHECK: shufflevector{{.*}} return _mm256_set_m128(hi, lo); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_set_m128d( +// CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[SHUFFLE_I]] +// __m256d test_mm256_set_m128d(__m128d hi, __m128d lo) { - // CHECK-LABEL: test_mm256_set_m128d - // CHECK: shufflevector{{.*}} return _mm256_set_m128d(hi, lo); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_set_m128i( +// CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I]] +// __m256i test_mm256_set_m128i(__m128i hi, __m128i lo) { - // CHECK-LABEL: test_mm256_set_m128i - // CHECK: shufflevector{{.*}} return _mm256_set_m128i(hi, lo); } +// CHECK-LABEL: define dso_local <8 x float> @test_mm256_setr_m128( +// CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> +// CHECK-NEXT: ret <8 x float> [[SHUFFLE_I_I]] +// __m256 test_mm256_setr_m128(__m128 hi, __m128 lo) { - // CHECK-LABEL: test_mm256_setr_m128 - // CHECK: shufflevector{{.*}} return _mm256_setr_m128(lo, hi); } +// CHECK-LABEL: define dso_local <4 x double> @test_mm256_setr_m128d( +// CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x double> [[SHUFFLE_I_I]] +// __m256d test_mm256_setr_m128d(__m128d hi, __m128d lo) { - // CHECK-LABEL: test_mm256_setr_m128d - // CHECK: shufflevector{{.*}} return _mm256_setr_m128d(lo, hi); } +// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_setr_m128i( +// CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> +// CHECK-NEXT: ret <4 x i64> [[SHUFFLE_I_I]] +// __m256i test_mm256_setr_m128i(__m128i hi, __m128i lo) { - // CHECK-LABEL: test_mm256_setr_m128i - // CHECK: shufflevector{{.*}} return _mm256_setr_m128i(lo, hi); } diff --git a/clang/test/CodeGen/X86/sse.c b/clang/test/CodeGen/X86/sse.c index a75b8dc77e86e14..017bdd7846fa396 100644 --- a/clang/test/CodeGen/X86/sse.c +++ b/clang/test/CodeGen/X86/sse.c @@ -1,42 +1,72 @@ -// RUN: %clang_cc1 -ffreestanding -O3 -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s -// FIXME: This test currently depends on optimization - it should be rewritten to avoid it. +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -ffreestanding -triple x86_64-- -target-feature +sse4.1 -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s #include // Byte-shifts look reversed due to xmm register layout +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSLLDQ:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[CAST]], <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSLLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_slli_si128(__m128i a) { - // CHECK-LABEL: @test_mm_slli_si128 - // CHECK: shufflevector <16 x i8> <{{.*}}, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> {{.*}}, <16 x i32> return _mm_slli_si128(a, 5); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128_0( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSLLDQ:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[CAST]], <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSLLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_slli_si128_0(__m128i a) { - // CHECK-LABEL: @test_mm_slli_si128_0 - // CHECK-NOT: shufflevector return _mm_slli_si128(a, 0); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128_16( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret <2 x i64> zeroinitializer +// __m128i test_mm_slli_si128_16(__m128i a) { - // CHECK-LABEL: @test_mm_slli_si128_16 - // CHECK-NOT: shufflevector return _mm_slli_si128(a, 16); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> zeroinitializer, <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_srli_si128(__m128i a) { - // CHECK-LABEL: @test_mm_srli_si128 - // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> , <16 x i32> return _mm_srli_si128(a, 5); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128_0( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8> +// CHECK-NEXT: [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> zeroinitializer, <16 x i32> +// CHECK-NEXT: [[CAST1:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[CAST1]] +// __m128i test_mm_srli_si128_0(__m128i a) { - // CHECK-LABEL: @test_mm_srli_si128_0 - // CHECK-NOT: shufflevector return _mm_srli_si128(a, 0); } +// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128_16( +// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret <2 x i64> zeroinitializer +// __m128i test_mm_srli_si128_16(__m128i a) { - // CHECK-LABEL: @test_mm_srli_si128_16 - // CHECK-NOT: shufflevector return _mm_srli_si128(a, 16); } From 5903c6af44256e0bba77b7a69d608aa3ccc6a0dd Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Mon, 28 Oct 2024 15:35:17 -0700 Subject: [PATCH 234/425] InstCombine: Fold shufflevector(select) and shufflevector(phi) (#113746) - Transform `shufflevector(select(c, x, y), C)` to `select(c, shufflevector(x, C), shufflevector(y, C))` by re-using the `FoldOpIntoSelect` helper. - Transform `shufflevector(phi(x, y), C)` to `phi(shufflevector(x, C), shufflevector(y, C))` by re-using the `foldOpInotPhi` helper. --- .../InstCombine/InstCombineVectorOps.cpp | 11 +++++ .../Transforms/InstCombine/vec_shuffle.ll | 44 +++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index d68ae64f08aa90a..75e7c1c97018cb1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2900,6 +2900,17 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (Instruction *I = foldIdentityPaddedShuffles(SVI)) return I; + if (match(RHS, m_Constant())) { + if (auto *SI = dyn_cast(LHS)) { + if (Instruction *I = FoldOpIntoSelect(SVI, SI)) + return I; + } + if (auto *PN = dyn_cast(LHS)) { + if (Instruction *I = foldOpIntoPhi(SVI, PN)) + return I; + } + } + if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) { Value *V = evaluateInDifferentElementOrder(LHS, Mask, Builder); return replaceInstUsesWith(SVI, V); diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 75a84e51279b805..d050cf10849e3cc 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -2377,3 +2377,47 @@ define <2 x i32> @not_splat_shuffle2(i32 %x) { %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <2 x i32> ret <2 x i32> %shuf } +define <2 x i32> @foldselect0(i1 %c) { +; CHECK-LABEL: @foldselect0( +; CHECK-NEXT: [[SHUF:%.*]] = select i1 [[C:%.*]], <2 x i32> , <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[SHUF]] +; + %sel = select i1 %c, <2 x i32> , <2 x i32> + %shuf = shufflevector <2 x i32> %sel, <2 x i32> poison, <2 x i32> + ret <2 x i32> %shuf +} + +declare i1 @cond() +declare <4 x i32> @value() + +define <4 x i32> @foldphi1() { +; CHECK-LABEL: @foldphi1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[V:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[XOR:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[VAL:%.*]] = call <4 x i32> @value() +; CHECK-NEXT: [[XOR]] = xor <4 x i32> [[V]], [[VAL]] +; CHECK-NEXT: [[C:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[SHUF1:%.*]] = shufflevector <4 x i32> [[XOR]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[SHUF1]] +; +entry: + br label %loop + +loop: + %v = phi <4 x i32> [zeroinitializer, %entry], [%shuf1, %loop] + + %shuf0 = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> + %val = call <4 x i32> @value() + %xor = xor <4 x i32> %shuf0, %val + %shuf1 = shufflevector <4 x i32> %xor, <4 x i32> poison, <4 x i32> + + %c = call i1 @cond() + br i1 %c, label %loop, label %exit + +exit: + ret <4 x i32> %shuf1 +} From 034cae42b130760587770b6a001b70c2a01bdfe9 Mon Sep 17 00:00:00 2001 From: Doug Wyatt Date: Mon, 28 Oct 2024 15:36:37 -0700 Subject: [PATCH 235/425] [clang] Add clang/docs/FunctionEffectAnalysis.rst. (#109855) Follow-on from #99656, which introduces 2nd pass caller/callee analysis for function effects. Wrote a new documentation page, derived directly from the RFC posted to LLVM Discourse earlier this year. --------- Co-authored-by: Doug Wyatt Co-authored-by: Sirraide --- clang/docs/FunctionEffectAnalysis.rst | 535 ++++++++++++++++++++++++++ clang/docs/index.rst | 1 + 2 files changed, 536 insertions(+) create mode 100644 clang/docs/FunctionEffectAnalysis.rst diff --git a/clang/docs/FunctionEffectAnalysis.rst b/clang/docs/FunctionEffectAnalysis.rst new file mode 100644 index 000000000000000..f286aec4f5b98fd --- /dev/null +++ b/clang/docs/FunctionEffectAnalysis.rst @@ -0,0 +1,535 @@ +======================== +Function Effect Analysis +======================== + +.. contents:: + :depth: 3 + :local: + + +Introduction +============ + +Clang Function Effect Analysis is a language extension which can warn about "unsafe" +constructs. The feature is currently tailored for the Performance Constraint attributes +``nonblocking`` and ``nonallocating``; functions with these attributes are verified as not +containing any language constructs or calls to other functions which violate the constraint. +(See :doc:`AttributeReference`.) + + +The ``nonblocking`` and ``nonallocating`` attributes +==================================================== + +Attribute syntax +---------------- + +The ``nonblocking`` and ``nonallocating`` attributes apply to function types, allowing them to be +attached to functions, blocks, function pointers, lambdas, and member functions. + +.. code-block:: c++ + + // Functions + void nonblockingFunction() [[clang::nonblocking]]; + void nonallocatingFunction() [[clang::nonallocating]]; + + // Function pointers + void (*nonblockingFunctionPtr)() [[clang::nonblocking]]; + + // Typedefs, type aliases. + typedef void (*NBFunctionPtrTypedef)() [[clang::nonblocking]]; + using NBFunctionPtrTypeAlias_gnu = __attribute__((nonblocking)) void (*)(); + using NBFunctionPtrTypeAlias_std = void (*)() [[clang::nonblocking]]; + + // C++ methods + struct Struct { + void NBMethod() [[clang::nonblocking]]; + }; + + // C++ lambdas + auto nbLambda = []() [[clang::nonblocking]] {}; + + // Blocks + void (^nbBlock)() = ^() [[clang::nonblocking]] {}; + +The attribute applies only to the function itself. In particular, it does not apply to any nested +functions or declarations, such as blocks, lambdas, and local classes. + +This document uses the C++/C23 syntax ``[[clang::nonblocking]]``, since it parallels the placement +of the ``noexcept`` specifier, and the attributes have other similarities to ``noexcept``. The GNU +``__attribute__((nonblocking))`` syntax is also supported. Note that it requires a different +placement on a C++ type alias. + +Like ``noexcept``, ``nonblocking`` and ``nonallocating`` have an optional argument, a compile-time +constant boolean expression. By default, the argument is ``true``, so ``[[clang::nonblocking]]`` +is equivalent to ``[[clang::nonblocking(true)]]``, and declares the function type as never blocking. + + +Attribute semantics +------------------- + +Together with ``noexcept``, the ``nonallocating`` and ``nonblocking`` attributes define an ordered +series of performance constraints. From weakest to strongest: + +- ``noexcept`` (as per the C++ standard): The function type will never throw an exception. +- ``nonallocating``: The function type will never allocate memory on the heap or throw an + exception. +- ``nonblocking``: The function type will never block on a lock, allocate memory on the heap, + or throw an exception. + +``nonblocking`` includes the ``nonallocating`` guarantee. + +While ``nonblocking`` and ``nonallocating`` are conceptually a superset of ``noexcept``, neither +attribute implicitly specifies ``noexcept``. Further, ``noexcept`` has a specified runtime behavior of +aborting if an exception is thrown, while the ``nonallocating`` and ``nonblocking`` attributes are +mainly for compile-time analysis and have no runtime behavior, except in code built +with Clang's :doc:`RealtimeSanitizer`. Nonetheless, Clang emits a +warning if, in C++, a function is declared ``nonblocking`` or ``nonallocating`` without +``noexcept``. This diagnostic is controlled by ``-Wperf-constraint-implies-noexcept``. + +``nonblocking(true)`` and ``nonallocating(true)`` apply to function *types*, and by extension, to +function-like declarations. When applied to a declaration with a body, the compiler verifies the +function, as described in the section "Analysis and warnings", below. + +``blocking`` and ``allocating`` are synonyms for ``nonblocking(false)`` and +``nonallocating(false)``, respectively. They can be used on a function-like declaration to +explicitly disable any potential inference of ``nonblocking`` or ``nonallocating`` during +verification. (Inference is described later in this document). ``nonblocking(false)`` and +``nonallocating(false)`` are legal, but superfluous when applied to a function *type* +that is not part of a declarator: ``float (int) [[nonblocking(false)]]`` and +``float (int)`` are identical types. + +For functions with no explicit performance constraint, the worst is assumed: the function +allocates memory and potentially blocks, unless it can be inferred otherwise. This is detailed in the +discussion of verification. + +The following example describes the meanings of all permutations of the two attributes and arguments: + +.. code-block:: c++ + + void nb1_na1() [[clang::nonblocking(true)]] [[clang::nonallocating(true)]]; + // Valid; nonallocating(true) is superfluous but doesn't contradict the guarantee. + + void nb1_na0() [[clang::nonblocking(true)]] [[clang::nonallocating(false)]]; + // error: 'allocating' and 'nonblocking' attributes are not compatible + + void nb0_na1() [[clang::nonblocking(false)]] [[clang::nonallocating(true)]]; + // Valid; the function does not allocate memory, but may lock for other reasons. + + void nb0_na0() [[clang::nonblocking(false)]] [[clang::nonallocating(false)]]; + // Valid. + + +Type conversions +---------------- + +A performance constraint can be removed or weakened via an implicit conversion. An attempt to add +or strengthen a performance constraint is unsafe and results in a warning. The rules for this +are comparable to that for ``noexcept`` in C++17 and later. + +.. code-block:: c++ + + void unannotated(); + void nonblocking() [[clang::nonblocking]]; + void nonallocating() [[clang::nonallocating]]; + + void example() + { + // It's fine to remove a performance constraint. + void (*fp_plain)(); + fp_plain = unannotated; + fp_plain = nonblocking; + fp_plain = nonallocating; + + // Adding/spoofing nonblocking is unsafe. + void (*fp_nonblocking)() [[clang::nonblocking]]; + fp_nonblocking = nullptr; + fp_nonblocking = nonblocking; + fp_nonblocking = unannotated; + // ^ warning: attribute 'nonblocking' should not be added via type conversion + fp_nonblocking = nonallocating; + // ^ warning: attribute 'nonblocking' should not be added via type conversion + + // Adding/spoofing nonallocating is unsafe. + void (*fp_nonallocating)() [[clang::nonallocating]]; + fp_nonallocating = nullptr; + fp_nonallocating = nonallocating; + fp_nonallocating = nonblocking; // no warning because nonblocking includes nonallocating + fp_nonallocating = unannotated; + // ^ warning: attribute 'nonallocating' should not be added via type conversion + } + +Virtual methods +--------------- + +In C++, when a virtual method has a performance constraint, overriding methods in +subclasses inherit the constraint. + +.. code-block:: c++ + + struct Base { + virtual void unsafe(); + virtual void safe() noexcept [[clang::nonblocking]]; + }; + + struct Derived : public Base { + void unsafe() [[clang::nonblocking]] override; + // It's okay for an overridden method to be more constrained + + void safe() noexcept override; + // This method is implicitly declared `nonblocking`, inherited from Base. + }; + +Redeclarations, overloads, and name mangling +-------------------------------------------- + +The ``nonblocking`` and ``nonallocating`` attributes, like ``noexcept``, do not factor into +argument-dependent lookup and overloaded functions/methods. + +First, consider that ``noexcept`` is integral to a function's type: + +.. code-block:: c++ + + void f1(int); + void f1(int) noexcept; + // error: exception specification in declaration does not match previous + // declaration + +Unlike ``noexcept``, a redeclaration of ``f2`` with an added or stronger performance constraint is +legal and propagates the attribute to the previous declaration: + +.. code-block:: c++ + + int f2(); + int f2() [[clang::nonblocking]]; // redeclaration with stronger constraint is OK. + +This greatly eases adoption by making it possible to annotate functions in external libraries +without modifying library headers. + +A redeclaration with a removed or weaker performance constraint produces a warning, paralleling +the behavior of ``noexcept``: + +.. code-block:: c++ + + int f2() { return 42; } + // warning: attribute 'nonblocking' on function does not match previous declaration + +In C++14, the following two declarations of `f3` are identical (a single function). In C++17 they +are separate overloads: + +.. code-block:: c++ + + void f3(void (*)()); + void f3(void (*)() noexcept); + +Similarly, the following two declarations of `f4` are separate overloads. This pattern may pose +difficulties due to ambiguity: + +.. code-block:: c++ + + void f4(void (*)()); + void f4(void (*)() [[clang::nonblocking]]); + +The attributes have no effect on the mangling of function and method names. + +Objective-C +----------- + +The attributes are currently unsupported on Objective-C methods. + +Analysis and warnings +===================== + +Constraints +----------- + +Functions declared ``nonallocating`` or ``nonblocking``, when defined, are verified according to the +following rules. Such functions: + +1. May not allocate or deallocate memory on the heap. The analysis follows the calls to + ``operator new`` and ``operator delete`` generated by the ``new`` and ``delete`` keywords, and + treats them like any other function call. The global ``operator new`` and ``operator delete`` + aren't declared ``nonblocking`` or ``nonallocating`` and so they are considered unsafe. (This + is correct because most memory allocators are not lock-free. Note that the placement form of + ``operator new`` is implemented inline in libc++'s ```` header, and is verifiably + ``nonblocking``, since it merely casts the supplied pointer to the result type.) + +2. May not throw or catch exceptions. To throw, the compiler must allocate the exception on the + heap. (Also, many subclasses of ``std::exception`` allocate a string). Exceptions are + deallocated when caught. + +3. May not make any indirect function call, via a virtual method, function pointer, or + pointer-to-member function, unless the target is explicitly declared with the same + ``nonblocking`` or ``nonallocating`` attribute (or stronger). + +4. May not make direct calls to any other function, with the following exceptions: + + a. The callee is also explicitly declared with the same ``nonblocking`` or ``nonallocating`` + attribute (or stronger). + b. The callee is defined in the same translation unit as the caller, does not have the ``false`` + form of the required attribute, and can be verified to have the same attribute or stronger, + according to these same rules. + c. The callee is a built-in function that is known not to block or allocate. + d. The callee is declared ``noreturn`` and, if compiling C++, the callee is also declared + ``noexcept``. This special case excludes functions such as ``abort()`` and ``std::terminate()`` + from the analysis. (The reason for requiring ``noexcept`` in C++ is that a function declared + ``noreturn`` could be a wrapper for ``throw``.) + +5. May not invoke or access an Objective-C method or property, since ``objc_msgSend()`` calls into + the Objective-C runtime, which may allocate memory or otherwise block. + +6. May not access thread-local variables. Typically, thread-local variables are allocated on the + heap when first accessed. + +Functions declared ``nonblocking`` have an additional constraint: + +7. May not declare static local variables (e.g. Meyers singletons). The compiler generates a lock + protecting the initialization of the variable. + +Violations of any of these rules result in warnings, in the ``-Wfunction-effects`` category: + +.. code-block:: c++ + + void notInline(); + + void example() [[clang::nonblocking]] + { + auto* x = new int; + // warning: function with 'nonblocking' attribute must not allocate or deallocate + // memory + + if (x == nullptr) { + static Logger* logger = createLogger(); + // warning: function with 'nonblocking' attribute must not have static local variables + + throw std::runtime_warning{ "null" }; + // warning: 'nonblocking" function 'example' must not throw exceptions + } + notInline(); + // warning: 'function with 'nonblocking' attribute must not call non-'nonblocking' function + // 'notInline' + // note (on notInline()): declaration cannot be inferred 'nonblocking' because it has no + // definition in this translation unit + } + +Inferring ``nonblocking`` or ``nonallocating`` +---------------------------------------------- + +In the absence of a ``nonblocking`` or ``nonallocating`` attribute (whether ``true`` or ``false``), +a function that is called from a performance-constrained function may be analyzed to +infer whether it has a desired attribute. This analysis happens when the function is not a virtual +method, and it has a visible definition within the current translation unit (i.e. its body can be +traversed). + +.. code-block:: c++ + + void notInline(); + int implicitlySafe() { return 42; } + void implicitlyUnsafe() { notInline(); } + + void example() [[clang::nonblocking]] + { + int x = implicitlySafe(); // OK + implicitlyUnsafe(); + // warning: function with 'nonblocking' attribute must not call non-'nonblocking' function + // 'implicitlyUnsafe' + // note (on implicitlyUnsafe): function cannot be inferred 'nonblocking' because it calls + // non-'nonblocking' function 'notInline' + // note (on notInline()): declaration cannot be inferred 'nonblocking' because it has no + // definition in this translation unit + } + +Lambdas and blocks +------------------ + +As mentioned earlier, the performance constraint attributes apply only to a single function and not +to any code nested inside it, including blocks, lambdas, and local classes. It is possible for a +nonblocking function to schedule the execution of a blocking lambda on another thread. Similarly, a +blocking function may create a ``nonblocking`` lambda for use in a realtime context. + +Operations which create, destroy, copy, and move lambdas and blocks are analyzed in terms of the +underlying function calls. For example, the creation of a lambda with captures generates a function +call to an anonymous struct's constructor, passing the captures as parameters. + +Implicit function calls in the AST +---------------------------------- + +The ``nonblocking`` / ``nonallocating`` analysis occurs at the Sema phase of analysis in Clang. +During Sema, there are some constructs which will eventually become function calls, but do not +appear as function calls in the AST. For example, ``auto* foo = new Foo;`` becomes a declaration +containing a ``CXXNewExpr`` which is understood as a function call to the global ``operator new`` +(in this example), and a ``CXXConstructExpr``, which, for analysis purposes, is a function call to +``Foo``'s constructor. Most gaps in the analysis would be due to incomplete knowledge of AST +constructs which become function calls. + +Disabling diagnostics +--------------------- + +Function effect diagnostics are controlled by ``-Wfunction-effects``. + +A construct like this can be used to exempt code from the checks described here: + +.. code-block:: c++ + + #define NONBLOCKING_UNSAFE(...) \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wunknown-warning-option\"") \ + _Pragma("clang diagnostic ignored \"-Wfunction-effects\"") \ + __VA_ARGS__ \ + _Pragma("clang diagnostic pop") + +Disabling the diagnostic allows for: + +- constructs which do block, but which in practice are used in ways to avoid unbounded blocking, + e.g. a thread pool with semaphores to coordinate multiple realtime threads; +- using libraries which are safe but not yet annotated; +- incremental adoption in a large codebase. + +Adoption +======== + +There are a few common issues that arise when adopting the ``nonblocking`` and ``nonallocating`` +attributes. + +C++ exceptions +-------------- + +Exceptions pose a challenge to the adoption of the performance constraints. Common library functions +which throw exceptions include: + ++----------------------------------+-----------------------------------------------------------------------+ +| Method | Alternative | ++==================================+=======================================================================+ +| ``std::vector::at()`` | ``operator[](size_t)``, after verifying that the index is in range. | ++----------------------------------+-----------------------------------------------------------------------+ +| ``std::optional::value()`` | ``operator*``, after checking ``has_value()`` or ``operator bool()``. | ++----------------------------------+-----------------------------------------------------------------------+ +| ``std::expected::value()`` | Same as for ``std::optional::value()``. | ++----------------------------------+-----------------------------------------------------------------------+ + + +``std::function`` +----------------------------- + +``std::function`` is generally incompatible with ``nonblocking`` and ``nonallocating`` +code, because a typical implementation may allocate heap memory in the constructor. + +Alternatives: + +- ``std::function_ref`` (available in C++26 or as ``llvm::function_ref``). This is appropriate and + optimal when a functor's lifetime does not need to extend past the function that created it. + +- ``inplace_function`` from WG14. This solves the allocation problem by giving the functor wrapper + a fixed size known at compile time and using an inline buffer. + +While these alternatives both address the heap allocation of ``std::function``, they are still +obstacles to ``nonblocking/nonallocating`` verification, for reasons detailed in the next section. + + +Interactions with type-erasure techniques +----------------------------------------- + +``std::function`` illustrates a common C++ type-erasure technique. Using template +argument deduction, it decomposes a function type into its return and parameter types. Additional +components of the function type, including ``noexcept``, ``nonblocking``, ``nonallocating``, and any +other attributes, are discarded. + +Standard library support for these components of a function type is not immediately forthcoming. + +Code can work around this limitation in either of two ways: + +1. Avoid abstractions like ``std::function`` and instead work directly with the original lambda type. + +2. Create a specialized alternative, e.g. ``nonblocking_function_ref`` where all function + pointers used in the implementation and its interface are ``nonblocking``. + +As an example of the first approach, when using a lambda as a *Callable* template parameter, the +attribute is preserved: + +.. code-block:: c++ + + std::sort(vec.begin(), vec.end(), + [](const Elem& a, const Elem& b) [[clang::nonblocking]] { return a.mem < b.mem; }); + +Here, the type of the ``Compare`` template parameter is an anonymous class generated from the +lambda, with an ``operator()`` method holding the ``nonblocking`` attribute. + +A complication arises when a *Callable* template parameter, instead of being a lambda or class +implementing ``operator()``, is a function pointer: + +.. code-block:: c++ + + static bool compare_elems(const Elem& a, const Elem& b) [[clang::nonblocking]] { + return a.mem < b.mem; }; + + std::sort(vec.begin(), vec.end(), compare_elems); + +Here, the type of ``compare_elems`` is decomposed to ``bool(const Elem&, const Elem&)``, without +``nonblocking``, when forming the template parameter. This can be solved using the second approach, +creating a specialized alternative which explicitly requires the attribute. In this case, it's +possible to use a small wrapper to transform the function pointer into a functor: + +.. code-block:: c++ + + template + class nonblocking_fp; + + template + class nonblocking_fp { + public: + using impl_t = R (*)(Args...) [[clang::nonblocking]]; + + private: + impl_t mImpl{ nullptr_t }; + public: + nonblocking_fp() = default; + nonblocking_fp(impl_t f) : mImpl{ f } {} + + R operator()(Args... args) const + { + return mImpl(std::forward(args)...); + } + }; + + // deduction guide (like std::function's) + template< class R, class... ArgTypes > + nonblocking_fp( R(*)(ArgTypes...) ) -> nonblocking_fp; + + // -- + + // Wrap the function pointer in a functor which preserves ``nonblocking``. + std::sort(vec.begin(), vec.end(), nonblocking_fp{ compare_elems }); + +Now, the ``nonblocking`` attribute of ``compare_elems`` is verified when it is converted to a +``nonblocking`` function pointer, as the argument to ``nonblocking_fp``'s constructor. The template +parameter is the functor class ``nonblocking_fp``. + + +Static local variables +---------------------- + +Static local variables are often used for lazily-constructed globals (Meyers singletons). Beyond the +compiler's use of a lock to ensure thread-safe initialization, it is dangerously easy to +inadvertently trigger initialization, involving heap allocation, from a ``nonblocking`` or +``nonallocating`` context. + +Generally, such singletons need to be replaced by globals, and care must be taken to ensure their +initialization before they are used from ``nonblocking`` or ``nonallocating`` contexts. + + +Annotating libraries +-------------------- + +It can be surprising that the analysis does not depend on knowledge of any primitives; it simply +assumes the worst, that all function calls are unsafe unless explicitly marked as safe or able to be +inferred as safe. With ``nonblocking``, this appears to suffice for all but the most primitive of +spinlocks. + +At least for an operating system's C functions, it is possible to define an override header which +redeclares safe common functions (e.g. ``pthread_self()``) with the addition of ``nonblocking``. +This may help in adopting the feature incrementally. + +It also helps that many of the functions in the standard C libraries (notably ````) +are treated as built-in functions by Clang, which the diagnosis understands to be safe. + +Much of the C++ standard library consists of inline templated functions which work well with +inference. A small number of primitives may need explicit ``nonblocking/nonallocating`` attributes. diff --git a/clang/docs/index.rst b/clang/docs/index.rst index 0f6fb36c4d3352b..1096432813fac52 100644 --- a/clang/docs/index.rst +++ b/clang/docs/index.rst @@ -27,6 +27,7 @@ Using Clang as a Compiler ThreadSafetyAnalysis SafeBuffers DataFlowAnalysisIntro + FunctionEffectAnalysis AddressSanitizer ThreadSanitizer MemorySanitizer From 902acde34198bb11cc758dcf3aee00eb1cb09ceb Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Mon, 28 Oct 2024 16:08:28 +0000 Subject: [PATCH 236/425] [InstCombine] Optimize away certain additions using modular arithmetic We can turn: ``` %add = add i8 %arg, C1 %and = and i8 %add, C2 %cmp = icmp eq i1 %and, C3 ``` into: ``` %and = and i8 %arg, C2 %cmp = icmp eq i1 %and, (C3 - C1) & C2 ``` This is only worth doing if the sequence is the sole user of the addition operation. --- .../InstCombine/InstCombineCompares.cpp | 16 ++++ .../Transforms/InstCombine/and-compare.ll | 84 +++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 338e9772c7cc088..6bb39cabb0988b9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1976,6 +1976,22 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp, return new ICmpInst(Pred, LShr, Constant::getNullValue(LShr->getType())); } + // (icmp eq/ne (and (add A, Addend), Msk), C) + // -> (icmp eq/ne (and A, Msk), (and (sub C, Addend), Msk)) + { + Value *A; + const APInt *Addend, *Msk; + if (match(And, m_And(m_OneUse(m_Add(m_Value(A), m_APInt(Addend))), + m_APInt(Msk))) && + Msk->isMask() && C.ule(*Msk)) { + APInt NewComperand = (C - *Addend) & *Msk; + Value* MaskA = Builder.CreateAnd(A, ConstantInt::get(A->getType(), *Msk)); + return new ICmpInst( + Pred, MaskA, + Constant::getIntegerValue(MaskA->getType(), NewComperand)); + } + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll index 5a9767a64a2ced0..9f8d3e317accc73 100644 --- a/llvm/test/Transforms/InstCombine/and-compare.ll +++ b/llvm/test/Transforms/InstCombine/and-compare.ll @@ -172,3 +172,87 @@ define i1 @test_ne_cp2_other_okay2(i8 %x, i8 %yy) { %r = icmp ne i8 %and_x_y, %and_x_neg_y ret i1 %r } + +define i1 @test_eq_0_and_15_add_1(i8 %a) { +; CHECK-LABEL: @test_eq_0_and_15_add_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 15 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %add = add i8 %a, 1 + %and = and i8 %add, 15 + %cmp = icmp eq i8 %and, 0 + ret i1 %cmp +} + +define i1 @test_ne_0_and_15_add_1(i8 %a) { +; CHECK-LABEL: @test_ne_0_and_15_add_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[TMP0]], 15 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %add = add i8 %a, 1 + %and = and i8 %add, 15 + %cmp = icmp ne i8 %and, 0 + ret i1 %cmp +} + +define i1 @test_eq_0_and_15_add_3(i8 %a) { +; CHECK-LABEL: @test_eq_0_and_15_add_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 13 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %add = add i8 %a, 3 + %and = and i8 %add, 15 + %cmp = icmp eq i8 %and, 0 + ret i1 %cmp +} + +define i1 @test_ne_0_and_15_add_3(i8 %a) { +; CHECK-LABEL: @test_ne_0_and_15_add_3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[TMP0]], 13 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %add = add i8 %a, 3 + %and = and i8 %add, 15 + %cmp = icmp ne i8 %and, 0 + ret i1 %cmp +} + +define i1 @test_eq_11_and_15_add_10(i8 %a) { +; CHECK-LABEL: @test_eq_11_and_15_add_10( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 1 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %add = add i8 %a, 10 + %and = and i8 %add, 15 + %cmp = icmp eq i8 %and, 11 + ret i1 %cmp +} + +define i1 @test_ne_11_and_15_add_10(i8 %a) { +; CHECK-LABEL: @test_ne_11_and_15_add_10( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[TMP0]], 1 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %add = add i8 %a, 10 + %and = and i8 %add, 15 + %cmp = icmp ne i8 %and, 11 + ret i1 %cmp +} From 757d0e4764fffcd4e60338147c5f5456e2534395 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Tue, 29 Oct 2024 06:13:32 +0700 Subject: [PATCH 237/425] Revert "[CFI][LowerTypeTests] Fix indirect call with alias" (#113978) Reverts llvm/llvm-project#106185 This is breaking Sanitizer bots: https://lab.llvm.org/buildbot/#/builders/66/builds/5449/steps/8/logs/stdio --- llvm/include/llvm/IR/ModuleSummaryIndexYAML.h | 102 +++++------------- llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 6 +- .../LowerTypeTests/cfi-icall-alias.ll | 54 ---------- 3 files changed, 29 insertions(+), 133 deletions(-) delete mode 100644 llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h index d12bc260f5cf4e2..902d1305c818acf 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -135,14 +135,10 @@ template <> struct MappingTraits { } }; -struct GlobalValueSummaryYaml { - // Commonly used fields +struct FunctionSummaryYaml { unsigned Linkage, Visibility; bool NotEligibleToImport, Live, IsLocal, CanAutoHide; unsigned ImportType; - // Fields for AliasSummary - std::optional Aliasee; - // Fields for FunctionSummary std::vector Refs; std::vector TypeTests; std::vector TypeTestAssumeVCalls, @@ -180,8 +176,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummary::ConstVCall) namespace llvm { namespace yaml { -template <> struct MappingTraits { - static void mapping(IO &io, GlobalValueSummaryYaml &summary) { +template <> struct MappingTraits { + static void mapping(IO &io, FunctionSummaryYaml& summary) { io.mapOptional("Linkage", summary.Linkage); io.mapOptional("Visibility", summary.Visibility); io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport); @@ -189,7 +185,6 @@ template <> struct MappingTraits { io.mapOptional("Local", summary.IsLocal); io.mapOptional("CanAutoHide", summary.CanAutoHide); io.mapOptional("ImportType", summary.ImportType); - io.mapOptional("Aliasee", summary.Aliasee); io.mapOptional("Refs", summary.Refs); io.mapOptional("TypeTests", summary.TypeTests); io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls); @@ -204,7 +199,7 @@ template <> struct MappingTraits { } // End yaml namespace } // End llvm namespace -LLVM_YAML_IS_SEQUENCE_VECTOR(GlobalValueSummaryYaml) +LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionSummaryYaml) namespace llvm { namespace yaml { @@ -212,99 +207,61 @@ namespace yaml { // FIXME: Add YAML mappings for the rest of the module summary. template <> struct CustomMappingTraits { static void inputOne(IO &io, StringRef Key, GlobalValueSummaryMapTy &V) { - std::vector GVSums; - io.mapRequired(Key.str().c_str(), GVSums); + std::vector FSums; + io.mapRequired(Key.str().c_str(), FSums); uint64_t KeyInt; if (Key.getAsInteger(0, KeyInt)) { io.setError("key not an integer"); return; } auto &Elem = V.try_emplace(KeyInt, /*IsAnalysis=*/false).first->second; - for (auto &GVSum : GVSums) { - GlobalValueSummary::GVFlags GVFlags( - static_cast(GVSum.Linkage), - static_cast(GVSum.Visibility), - GVSum.NotEligibleToImport, GVSum.Live, GVSum.IsLocal, - GVSum.CanAutoHide, - static_cast(GVSum.ImportType)); - if (GVSum.Aliasee) { - auto ASum = std::make_unique(GVFlags); - if (!V.count(*GVSum.Aliasee)) - V.emplace(*GVSum.Aliasee, /*IsAnalysis=*/false); - ValueInfo AliaseeVI(/*IsAnalysis=*/false, &*V.find(*GVSum.Aliasee)); - // Note: Aliasee cannot be filled until all summaries are loaded. - // This is done in fixAliaseeLinks() which is called in - // MappingTraits::mapping(). - ASum->setAliasee(AliaseeVI, /*Aliasee=*/nullptr); - Elem.SummaryList.push_back(std::move(ASum)); - continue; - } + for (auto &FSum : FSums) { SmallVector Refs; - Refs.reserve(GVSum.Refs.size()); - for (auto &RefGUID : GVSum.Refs) { + Refs.reserve(FSum.Refs.size()); + for (auto &RefGUID : FSum.Refs) { auto It = V.try_emplace(RefGUID, /*IsAnalysis=*/false).first; Refs.push_back(ValueInfo(/*IsAnalysis=*/false, &*It)); } Elem.SummaryList.push_back(std::make_unique( - GVFlags, /*NumInsts=*/0, FunctionSummary::FFlags{}, std::move(Refs), - SmallVector{}, std::move(GVSum.TypeTests), - std::move(GVSum.TypeTestAssumeVCalls), - std::move(GVSum.TypeCheckedLoadVCalls), - std::move(GVSum.TypeTestAssumeConstVCalls), - std::move(GVSum.TypeCheckedLoadConstVCalls), + GlobalValueSummary::GVFlags( + static_cast(FSum.Linkage), + static_cast(FSum.Visibility), + FSum.NotEligibleToImport, FSum.Live, FSum.IsLocal, + FSum.CanAutoHide, + static_cast(FSum.ImportType)), + /*NumInsts=*/0, FunctionSummary::FFlags{}, std::move(Refs), + SmallVector{}, std::move(FSum.TypeTests), + std::move(FSum.TypeTestAssumeVCalls), + std::move(FSum.TypeCheckedLoadVCalls), + std::move(FSum.TypeTestAssumeConstVCalls), + std::move(FSum.TypeCheckedLoadConstVCalls), ArrayRef{}, ArrayRef{}, ArrayRef{})); } } static void output(IO &io, GlobalValueSummaryMapTy &V) { for (auto &P : V) { - std::vector GVSums; + std::vector FSums; for (auto &Sum : P.second.SummaryList) { if (auto *FSum = dyn_cast(Sum.get())) { std::vector Refs; Refs.reserve(FSum->refs().size()); for (auto &VI : FSum->refs()) Refs.push_back(VI.getGUID()); - GVSums.push_back(GlobalValueSummaryYaml{ + FSums.push_back(FunctionSummaryYaml{ FSum->flags().Linkage, FSum->flags().Visibility, static_cast(FSum->flags().NotEligibleToImport), static_cast(FSum->flags().Live), static_cast(FSum->flags().DSOLocal), static_cast(FSum->flags().CanAutoHide), - FSum->flags().ImportType, /*Aliasee=*/std::nullopt, Refs, - FSum->type_tests(), FSum->type_test_assume_vcalls(), - FSum->type_checked_load_vcalls(), + FSum->flags().ImportType, Refs, FSum->type_tests(), + FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(), FSum->type_test_assume_const_vcalls(), FSum->type_checked_load_const_vcalls()}); - } else if (auto *ASum = dyn_cast(Sum.get()); - ASum && ASum->hasAliasee()) { - GVSums.push_back(GlobalValueSummaryYaml{ - ASum->flags().Linkage, ASum->flags().Visibility, - static_cast(ASum->flags().NotEligibleToImport), - static_cast(ASum->flags().Live), - static_cast(ASum->flags().DSOLocal), - static_cast(ASum->flags().CanAutoHide), - ASum->flags().ImportType, - /*Aliasee=*/ASum->getAliaseeGUID()}); - } - } - if (!GVSums.empty()) - io.mapRequired(llvm::utostr(P.first).c_str(), GVSums); - } - } - static void fixAliaseeLinks(GlobalValueSummaryMapTy &V) { - for (auto &P : V) { - for (auto &Sum : P.second.SummaryList) { - if (auto *Alias = dyn_cast(Sum.get())) { - ValueInfo AliaseeVI = Alias->getAliaseeVI(); - auto AliaseeSL = AliaseeVI.getSummaryList(); - if (AliaseeSL.empty()) { - ValueInfo EmptyVI; - Alias->setAliasee(EmptyVI, nullptr); - } else - Alias->setAliasee(AliaseeVI, AliaseeSL[0].get()); - } + } } + if (!FSums.empty()) + io.mapRequired(llvm::utostr(P.first).c_str(), FSums); } } }; @@ -324,9 +281,6 @@ template <> struct CustomMappingTraits { template <> struct MappingTraits { static void mapping(IO &io, ModuleSummaryIndex& index) { io.mapOptional("GlobalValueMap", index.GlobalValueMap); - if (!io.outputting()) - CustomMappingTraits::fixAliaseeLinks( - index.GlobalValueMap); io.mapOptional("TypeIdMap", index.TypeIdMap); io.mapOptional("WithGlobalValueDeadStripping", index.WithGlobalValueDeadStripping); diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 6ba371069bb2302..3fcfc6a876776d9 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -2083,12 +2083,8 @@ bool LowerTypeTestsModule::lower() { for (auto &I : *ExportSummary) for (auto &GVS : I.second.SummaryList) if (GVS->isLive()) - for (const auto &Ref : GVS->refs()) { + for (const auto &Ref : GVS->refs()) AddressTaken.insert(Ref.getGUID()); - for (auto &RefGVS : Ref.getSummaryList()) - if (auto Alias = dyn_cast(RefGVS.get())) - AddressTaken.insert(Alias->getAliaseeGUID()); - } NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); if (CfiFunctionsMD) { diff --git a/llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll b/llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll deleted file mode 100644 index 0c5324ee96c9391..000000000000000 --- a/llvm/test/Transforms/LowerTypeTests/cfi-icall-alias.ll +++ /dev/null @@ -1,54 +0,0 @@ -;; Check that if the address of a weak function is only taken through an alias, -;; it is still added to a list of exported functions and @llvm.type.test() is -;; lowered to an actual check against the generated CFI jumptable. - -RUN: rm -rf %t.dir && split-file %s %t.dir && cd %t.dir -RUN: opt test.ll --thinlto-bc --thinlto-split-lto-unit -o test.bc -RUN: llvm-modextract test.bc -n 0 -o test0.bc -RUN: llvm-modextract test.bc -n 1 -o test1.bc - -;; Check that a CFI jumptable is generated. -RUN: opt test1.bc -passes=lowertypetests -lowertypetests-read-summary=in.yaml \ -RUN: -lowertypetests-summary-action=export -lowertypetests-write-summary=exported.yaml \ -RUN: -S -o - | FileCheck %s --check-prefix=REGULAR -REGULAR: @__typeid__ZTSFvvE_global_addr = hidden alias i8, ptr @.cfi.jumptable -REGULAR: @f = alias void (), ptr @.cfi.jumptable -REGULAR: define private void @.cfi.jumptable() - -;; CHECK that @llvm.type.test() is lowered to an actual check. -RUN: opt test0.bc -passes=lowertypetests -lowertypetests-read-summary=exported.yaml \ -RUN: -lowertypetests-summary-action=import -S -o - | FileCheck %s --check-prefix=THIN -THIN: define i1 @test() { -THIN-NEXT: %1 = icmp eq i64 ptrtoint (ptr @alias to i64), ptrtoint (ptr @__typeid__ZTSFvvE_global_addr to i64) -THIN-NEXT: ret i1 %1 -THIN-NEXT: } - -;--- test.ll -target triple = "x86_64-pc-linux-gnu" - -@alias = alias void(), ptr @f - -define weak void @f() !type !0 { - ret void -} - -define i1 @test() { - %1 = call i1 @llvm.type.test(ptr nonnull @alias, metadata !"_ZTSFvvE") - ret i1 %1 -} - -declare i1 @llvm.type.test(ptr, metadata) - -!0 = !{i64 0, !"_ZTSFvvE"} -;--- in.yaml ---- -GlobalValueMap: - 8346051122425466633: # guid("test") - - Live: true - Refs: [5833419078793185394] # guid("alias") - TypeTests: [9080559750644022485] # guid("_ZTSFvvE") - 5833419078793185394: # guid("alias") - - Aliasee: 14740650423002898831 # guid("f") - 14740650423002898831: # guid("f") - - -... From 8e6856e27859c90c5337a8328848b0959fe409fe Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 28 Oct 2024 16:15:39 -0700 Subject: [PATCH 238/425] [Clang][TableGen] Use StringRef::str() instead of std::string() cast (#113645) Use `StringRef::str()` instead of std::string(StringRef) to cast from StringRef to std::string. --- clang/utils/TableGen/ClangAttrEmitter.cpp | 65 +++++++++---------- .../ClangCommentCommandInfoEmitter.cpp | 2 +- ...mentHTMLNamedCharacterReferenceEmitter.cpp | 2 +- .../TableGen/ClangCommentHTMLTagsEmitter.cpp | 4 +- 4 files changed, 33 insertions(+), 40 deletions(-) diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index cf9c70a93e5db22..3031d81b3df7312 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -122,9 +122,7 @@ GetFlattenedSpellings(const Record &Attr) { static std::string ReadPCHRecord(StringRef type) { return StringSwitch(type) - .EndsWith("Decl *", "Record.readDeclAs<" + - std::string(type.data(), 0, type.size() - 1) + - ">()") + .EndsWith("Decl *", "Record.readDeclAs<" + type.drop_back().str() + ">()") .Case("TypeSourceInfo *", "Record.readTypeSourceInfo()") .Case("Expr *", "Record.readExpr()") .Case("IdentifierInfo *", "Record.readIdentifier()") @@ -145,18 +143,16 @@ static StringRef getStorageType(StringRef type) { static std::string WritePCHRecord(StringRef type, StringRef name) { return "Record." + StringSwitch(type) - .EndsWith("Decl *", "AddDeclRef(" + std::string(name) + ");\n") + .EndsWith("Decl *", "AddDeclRef(" + name.str() + ");\n") .Case("TypeSourceInfo *", - "AddTypeSourceInfo(" + std::string(name) + ");\n") - .Case("Expr *", "AddStmt(" + std::string(name) + ");\n") + "AddTypeSourceInfo(" + name.str() + ");\n") + .Case("Expr *", "AddStmt(" + name.str() + ");\n") .Case("IdentifierInfo *", - "AddIdentifierRef(" + std::string(name) + ");\n") - .Case("StringRef", "AddString(" + std::string(name) + ");\n") - .Case("ParamIdx", - "push_back(" + std::string(name) + ".serialize());\n") - .Case("OMPTraitInfo *", - "writeOMPTraitInfo(" + std::string(name) + ");\n") - .Default("push_back(" + std::string(name) + ");\n"); + "AddIdentifierRef(" + name.str() + ");\n") + .Case("StringRef", "AddString(" + name.str() + ");\n") + .Case("ParamIdx", "push_back(" + name.str() + ".serialize());\n") + .Case("OMPTraitInfo *", "writeOMPTraitInfo(" + name.str() + ");\n") + .Default("push_back(" + name.str() + ");\n"); } // Normalize attribute name by removing leading and trailing @@ -197,7 +193,7 @@ static ParsedAttrMap getParsedAttrList(const RecordKeeper &Records, std::string AN; if (Attr->isSubClassOf("TargetSpecificAttr") && !Attr->isValueUnset("ParseKind")) { - AN = std::string(Attr->getValueAsString("ParseKind")); + AN = Attr->getValueAsString("ParseKind").str(); // If this attribute has already been handled, it does not need to be // handled again. @@ -225,7 +221,7 @@ namespace { public: Argument(StringRef Arg, StringRef Attr) - : lowerName(std::string(Arg)), upperName(lowerName), attrName(Attr), + : lowerName(Arg.str()), upperName(lowerName), attrName(Attr), isOpt(false), Fake(false) { if (!lowerName.empty()) { lowerName[0] = std::tolower(lowerName[0]); @@ -331,8 +327,7 @@ namespace { void writePCHWrite(raw_ostream &OS) const override { OS << " " - << WritePCHRecord(type, - "SA->get" + std::string(getUpperName()) + "()"); + << WritePCHRecord(type, "SA->get" + getUpperName().str() + "()"); } std::string getIsOmitted() const override { @@ -698,12 +693,12 @@ namespace { VariadicArgument(const Record &Arg, StringRef Attr, std::string T) : Argument(Arg, Attr), Type(std::move(T)), ArgName(getLowerName().str() + "_"), ArgSizeName(ArgName + "Size"), - RangeName(std::string(getLowerName())) {} + RangeName(getLowerName().str()) {} VariadicArgument(StringRef Arg, StringRef Attr, std::string T) : Argument(Arg, Attr), Type(std::move(T)), ArgName(getLowerName().str() + "_"), ArgSizeName(ArgName + "Size"), - RangeName(std::string(getLowerName())) {} + RangeName(getLowerName().str()) {} const std::string &getType() const { return Type; } const std::string &getArgName() const { return ArgName; } @@ -792,8 +787,8 @@ namespace { // If we can't store the values in the current type (if it's something // like StringRef), store them in a different type and convert the // container afterwards. - std::string StorageType = std::string(getStorageType(getType())); - std::string StorageName = std::string(getLowerName()); + std::string StorageType = getStorageType(getType()).str(); + std::string StorageName = getLowerName().str(); if (StorageType != getType()) { StorageName += "Storage"; OS << " SmallVector<" << StorageType << ", 4> " @@ -1081,8 +1076,7 @@ namespace { public: VariadicEnumArgument(const Record &Arg, StringRef Attr) - : VariadicArgument(Arg, Attr, - std::string(Arg.getValueAsString("Type"))), + : VariadicArgument(Arg, Attr, Arg.getValueAsString("Type").str()), values(Arg.getValueAsListOfStrings("Values")), enums(Arg.getValueAsListOfStrings("Enums")), uniques(uniqueEnumsInOrder(enums)), @@ -1437,7 +1431,7 @@ namespace { void writePCHWrite(raw_ostream &OS) const override { OS << " " << WritePCHRecord(getType(), - "SA->get" + std::string(getUpperName()) + "Loc()"); + "SA->get" + getUpperName().str() + "Loc()"); } }; @@ -1766,11 +1760,10 @@ static void writeAttrAccessorDefinition(const Record &R, raw_ostream &OS) { static bool SpellingNamesAreCommon(const std::vector& Spellings) { assert(!Spellings.empty() && "An empty list of spellings was provided"); - std::string FirstName = - std::string(NormalizeNameForSpellingComparison(Spellings.front().name())); + StringRef FirstName = + NormalizeNameForSpellingComparison(Spellings.front().name()); for (const auto &Spelling : drop_begin(Spellings)) { - std::string Name = - std::string(NormalizeNameForSpellingComparison(Spelling.name())); + StringRef Name = NormalizeNameForSpellingComparison(Spelling.name()); if (Name != FirstName) return false; } @@ -1985,7 +1978,7 @@ struct AttributeSubjectMatchRule { } std::string getSpelling() const { - std::string Result = std::string(MetaSubject->getValueAsString("Name")); + std::string Result = MetaSubject->getValueAsString("Name").str(); if (isSubRule()) { Result += '('; if (isNegatedSubRule()) @@ -2728,7 +2721,7 @@ static void emitAttributes(const RecordKeeper &Records, raw_ostream &OS, for (const auto &[R, _] : reverse(Supers)) { if (R->getName() != "TargetSpecificAttr" && R->getName() != "DeclOrTypeAttr" && SuperName.empty()) - SuperName = std::string(R->getName()); + SuperName = R->getName().str(); if (R->getName() == "InheritableAttr") Inheritable = true; } @@ -4054,9 +4047,9 @@ static void emitArgInfo(const Record &R, raw_ostream &OS) { } static std::string GetDiagnosticSpelling(const Record &R) { - std::string Ret = std::string(R.getValueAsString("DiagSpelling")); + StringRef Ret = R.getValueAsString("DiagSpelling"); if (!Ret.empty()) - return Ret; + return Ret.str(); // If we couldn't find the DiagSpelling in this object, we can check to see // if the object is one that has a base, and if it is, loop up to the Base @@ -4089,7 +4082,7 @@ static std::string CalculateDiagnostic(const Record &S) { SmallVector Frags; SplitString(V, Frags, ","); for (auto Str : Frags) { - DiagList.push_back(std::string(Str.trim())); + DiagList.push_back(Str.trim().str()); } } } @@ -4120,7 +4113,7 @@ static std::string CalculateDiagnostic(const Record &S) { } static std::string GetSubjectWithSuffix(const Record *R) { - const std::string &B = std::string(R->getName()); + const std::string B = R->getName().str(); if (B == "DeclBase") return "Decl"; return B + "Decl"; @@ -5107,7 +5100,7 @@ GetAttributeHeadingAndSpellings(const Record &Documentation, "documented"); // Determine the heading to be used for this attribute. - std::string Heading = std::string(Documentation.getValueAsString("Heading")); + std::string Heading = Documentation.getValueAsString("Heading").str(); if (Heading.empty()) { // If there's only one spelling, we can simply use that. if (Spellings.size() == 1) @@ -5117,7 +5110,7 @@ GetAttributeHeadingAndSpellings(const Record &Documentation, for (auto I = Spellings.begin(), E = Spellings.end(); I != E; ++I) { std::string Spelling = - std::string(NormalizeNameForSpellingComparison(I->name())); + NormalizeNameForSpellingComparison(I->name()).str(); Uniques.insert(Spelling); } // If the semantic map has only one spelling, that is sufficient for our diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp index 1a2503dcf660cfb..45a97425ef920aa 100644 --- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp @@ -63,7 +63,7 @@ void clang::EmitClangCommentCommandInfo(const RecordKeeper &Records, std::vector Matches; for (size_t i = 0, e = Tags.size(); i != e; ++i) { const Record &Tag = *Tags[i]; - std::string Name = std::string(Tag.getValueAsString("Name")); + std::string Name = Tag.getValueAsString("Name").str(); std::string Return; raw_string_ostream(Return) << "return &Commands[" << i << "];"; Matches.emplace_back(std::move(Name), std::move(Return)); diff --git a/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp b/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp index bd75b3f6b652a16..2d615760814e01b 100644 --- a/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp @@ -51,7 +51,7 @@ void clang::EmitClangCommentHTMLNamedCharacterReferences( std::vector NameToUTF8; SmallString<32> CLiteral; for (const Record *Tag : Records.getAllDerivedDefinitions("NCR")) { - std::string Spelling = std::string(Tag->getValueAsString("Spelling")); + std::string Spelling = Tag->getValueAsString("Spelling").str(); uint64_t CodePoint = Tag->getValueAsInt("CodePoint"); CLiteral.clear(); CLiteral.append("return "); diff --git a/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp b/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp index a457315bc62c5c7..7d65cfe0d3f5292 100644 --- a/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp +++ b/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp @@ -24,7 +24,7 @@ void clang::EmitClangCommentHTMLTags(const RecordKeeper &Records, ArrayRef Tags = Records.getAllDerivedDefinitions("Tag"); std::vector Matches; for (const Record *Tag : Tags) { - Matches.emplace_back(std::string(Tag->getValueAsString("Spelling")), + Matches.emplace_back(Tag->getValueAsString("Spelling").str(), "return true;"); } @@ -42,7 +42,7 @@ void clang::EmitClangCommentHTMLTagsProperties(const RecordKeeper &Records, std::vector MatchesEndTagOptional; std::vector MatchesEndTagForbidden; for (const Record *Tag : Tags) { - std::string Spelling = std::string(Tag->getValueAsString("Spelling")); + std::string Spelling = Tag->getValueAsString("Spelling").str(); StringMatcher::StringPair Match(Spelling, "return true;"); if (Tag->getValueAsBit("EndTagOptional")) MatchesEndTagOptional.push_back(Match); From 7c554265ce0b94059f216dcab643055e98c8f439 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Mon, 28 Oct 2024 16:55:27 -0700 Subject: [PATCH 239/425] Revert "[rtsan] Intercept aligned_alloc on all versions of OSX if available on build machine (#112780)" (#113982) This reverts commit 97fb21ac1d6bc528b61a555356457ff2129dfde1. Due to issue brought up in #112780 > Unfortunately this breaks the build on our (automerger) bots, which have -mmacosx-version-min=10.13 and also -Werror=unguarded-availability-new . I was thinking about patching it via wrapping in __builtin_available check (which I believe is the right one to use, as it should match the -mmacosx-version-min ) - but can't actually think of a quick fix, due to interceptors being defined via C macros. > llvm-project/compiler-rt/lib/rtsan/rtsan_interceptors_posix.cpp:475:21: error: 'aligned_alloc' is only available on macOS 10.15 or newer [-Werror,-Wunguarded-availability-new] 475 | INTERCEPTOR(void *, aligned_alloc, SIZE_T alignment, SIZE_T size) { --- .../tests/rtsan_test_interceptors_posix.cpp | 17 +++++--------- .../sanitizer_platform_interceptors.h | 22 +------------------ 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp index 38274485c29f66a..6233c3e91800e10 100644 --- a/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp +++ b/compiler-rt/lib/rtsan/tests/rtsan_test_interceptors_posix.cpp @@ -122,20 +122,13 @@ TEST(TestRtsanInterceptors, VallocDiesWhenRealtime) { ExpectNonRealtimeSurvival(Func); } -#if __has_builtin(__builtin_available) && SANITIZER_APPLE -#define ALIGNED_ALLOC_AVAILABLE() (__builtin_available(macOS 10.15, *)) -#else -// We are going to assume this is true until we hit systems where it isn't -#define ALIGNED_ALLOC_AVAILABLE() (true) -#endif - +#if SANITIZER_INTERCEPT_ALIGNED_ALLOC TEST(TestRtsanInterceptors, AlignedAllocDiesWhenRealtime) { - if (ALIGNED_ALLOC_AVAILABLE()) { - auto Func = []() { EXPECT_NE(nullptr, aligned_alloc(16, 32)); }; - ExpectRealtimeDeath(Func, "aligned_alloc"); - ExpectNonRealtimeSurvival(Func); - } + auto Func = []() { EXPECT_NE(nullptr, aligned_alloc(16, 32)); }; + ExpectRealtimeDeath(Func, "aligned_alloc"); + ExpectNonRealtimeSurvival(Func); } +#endif // free_sized and free_aligned_sized (both C23) are not yet supported TEST(TestRtsanInterceptors, FreeDiesWhenRealtime) { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h index 3fd6b595ef197f8..6959a6d52d604e0 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h @@ -84,25 +84,6 @@ #define SI_NOT_MAC 1 #endif -#if SANITIZER_APPLE -# include - -// aligned_alloc was introduced in OSX 10.15 -// Linking will fail when using an older SDK -# if defined(__MAC_10_15) -// macOS 10.15 is greater than our minimal deployment target. To ensure we -// generate a weak reference so the dylib continues to work on older -// systems, we need to forward declare the intercepted function as "weak -// imports". -SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment, - __sanitizer::usize __size); -# define SI_MAC_SDK_10_15_AVAILABLE 1 -# else -# define SI_MAC_SDK_10_15_AVAILABLE 0 -# endif // defined(__MAC_10_15) - -#endif // SANITIZER_APPLE - #if SANITIZER_IOS #define SI_IOS 1 #else @@ -519,8 +500,7 @@ SANITIZER_WEAK_IMPORT void *aligned_alloc(__sanitizer::usize __alignment, #define SANITIZER_INTERCEPT_PVALLOC (SI_GLIBC || SI_ANDROID) #define SANITIZER_INTERCEPT_CFREE (SI_GLIBC && !SANITIZER_RISCV64) #define SANITIZER_INTERCEPT_REALLOCARRAY SI_POSIX -#define SANITIZER_INTERCEPT_ALIGNED_ALLOC \ - (!SI_MAC || SI_MAC_SDK_10_15_AVAILABLE) +#define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC) #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE (!SI_MAC && !SI_NETBSD) #define SANITIZER_INTERCEPT_MCHECK_MPROBE SI_LINUX_NOT_ANDROID #define SANITIZER_INTERCEPT_WCSLEN 1 From b46a0482f9e4c0ee82b38da794b20f8f1a76f044 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 29 Oct 2024 00:56:03 +0100 Subject: [PATCH 240/425] [clang][bytecode] Implement __builtin_arithmetic_fence (#113937) --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 14 ++++++++++++++ clang/test/Sema/arithmetic-fence-builtin.c | 5 +++++ 2 files changed, 19 insertions(+) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 10e33c14f4b455b..b00d2a1768b6b71 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1670,6 +1670,15 @@ static bool interp__builtin_operator_delete(InterpState &S, CodePtr OpPC, S, OpPC, *AllocForm, DynamicAllocator::Form::Operator, BlockDesc, Source); } +static bool interp__builtin_arithmetic_fence(InterpState &S, CodePtr OpPC, + const InterpFrame *Frame, + const Function *Func, + const CallExpr *Call) { + const Floating &Arg0 = S.Stk.peek(); + S.Stk.push(Arg0); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, const CallExpr *Call, uint32_t BuiltinID) { const InterpFrame *Frame = S.Current; @@ -2111,6 +2120,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, return false; break; + case Builtin::BI__arithmetic_fence: + if (!interp__builtin_arithmetic_fence(S, OpPC, Frame, F, Call)) + return false; + break; + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/test/Sema/arithmetic-fence-builtin.c b/clang/test/Sema/arithmetic-fence-builtin.c index a1941970edb53c0..55867ffb5e012cd 100644 --- a/clang/test/Sema/arithmetic-fence-builtin.c +++ b/clang/test/Sema/arithmetic-fence-builtin.c @@ -1,8 +1,13 @@ // RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - -verify -x c++ %s +// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - -verify -x c++ %s -fexperimental-new-constant-interpreter // RUN: %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -verify -x c++ %s +// RUN: %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -verify -x c++ %s -fexperimental-new-constant-interpreter // RUN: not %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -x c++ %s \ // RUN: -fprotect-parens 2>&1 | FileCheck -check-prefix=PPC %s +// RUN: not %clang_cc1 -triple ppc64le -DPPC -emit-llvm -o - -x c++ %s -fexperimental-new-constant-interpreter \ +// RUN: -fprotect-parens 2>&1 | FileCheck -check-prefix=PPC %s // RUN: %clang_cc1 -triple spir64 -emit-llvm -o - -verify -x c++ %s +// RUN: %clang_cc1 -triple spir64 -emit-llvm -o - -verify -x c++ %s -fexperimental-new-constant-interpreter #ifndef PPC int v; template T addT(T a, T b) { From 1549a0c183ee337a6de4c3933e10828808c6a094 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Tue, 29 Oct 2024 09:10:30 +0900 Subject: [PATCH 241/425] [mlir][SCF] Remove `scf-bufferize` pass (#113840) The dialect conversion-based bufferization passes have been migrated to One-Shot Bufferize about two years ago. To clean up the code base, this commit removes the `scf-bufferize` pass, one of the few remaining parts of the old infrastructure. Most bufferization passes have already been removed. Note for LLVM integration: If you depend on this pass, migrate to One-Shot Bufferize or copy the pass to your codebase. --- mlir/docs/Bufferization.md | 17 +------ .../mlir/Dialect/SCF/Transforms/Passes.h | 3 -- .../mlir/Dialect/SCF/Transforms/Passes.td | 7 --- .../BufferizableOpInterfaceImpl.cpp | 6 ++- mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp | 47 ------------------- .../lib/Dialect/SCF/Transforms/CMakeLists.txt | 1 - mlir/test/Dialect/SCF/bufferize.mlir | 34 +++++++++++--- 7 files changed, 32 insertions(+), 83 deletions(-) delete mode 100644 mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp diff --git a/mlir/docs/Bufferization.md b/mlir/docs/Bufferization.md index d5a426e09e7ceb1..7d38ebb38535c73 100644 --- a/mlir/docs/Bufferization.md +++ b/mlir/docs/Bufferization.md @@ -579,7 +579,6 @@ The code, slightly simplified and annotated, is reproduced here: // Partial bufferization passes. pm.addPass(createTensorConstantBufferizePass()); pm.addNestedPass(createTCPBufferizePass()); // Bufferizes the downstream `tcp` dialect. - pm.addNestedPass(createSCFBufferizePass()); pm.addNestedPass(createLinalgBufferizePass()); pm.addNestedPass(createTensorBufferizePass()); pm.addPass(createFuncBufferizePass()); @@ -596,7 +595,7 @@ must be module passes because they make changes to the top-level module. The bulk of the bufferization work is done by the function passes. Most of these passes are provided as part of the upstream MLIR distribution and bufferize -their respective dialects (e.g. `scf-bufferize` bufferizes the `scf` dialect). +their respective dialects (e.g. `abc-bufferize` bufferizes the `abc` dialect). The `tcp-bufferize` pass is an exception -- it is a partial bufferization pass used to bufferize the downstream `tcp` dialect, and fits in perfectly with all the other passes provided upstream. @@ -694,20 +693,6 @@ which helps with this in general. ### Other partial bufferization examples -- `scf-bufferize` - ([code](https://github.com/llvm/llvm-project/blob/bc8acf2ce8ad6e8c9b1d97b2e02d3f4ad26e1d9d/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp#L1), - [test](https://github.com/llvm/llvm-project/blob/bc8acf2ce8ad6e8c9b1d97b2e02d3f4ad26e1d9d/mlir/test/Dialect/SCF/bufferize.mlir#L1)) - - - Bufferizes ops from the `scf` dialect. - - This is an example of how to bufferize ops that implement - `RegionBranchOpInterface` (that is, they use regions to represent - control flow). - - The bulk of the work is done by - `lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp` - ([code](https://github.com/llvm/llvm-project/blob/daaaed6bb89044ac58a23f1bb1ccdd12342a5a58/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp#L1)), - which is well-commented and covers how to correctly convert ops that - contain regions. - - `func-bufferize` ([code](https://github.com/llvm/llvm-project/blob/2f5715dc78328215d51d5664c72c632a6dac1046/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp#L1), [test](https://github.com/llvm/llvm-project/blob/2f5715dc78328215d51d5664c72c632a6dac1046/mlir/test/Dialect/Func/func-bufferize.mlir#L1)) diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h index fb8411418ff9a00..b70599df6f5033c 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h @@ -20,9 +20,6 @@ namespace mlir { #define GEN_PASS_DECL #include "mlir/Dialect/SCF/Transforms/Passes.h.inc" -/// Creates a pass that bufferizes the SCF dialect. -std::unique_ptr createSCFBufferizePass(); - /// Creates a pass that specializes for loop for unrolling and /// vectorization. std::unique_ptr createForLoopSpecializationPass(); diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td index 53d1ae10dc87d84..6e5ef96c450aa4a 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td @@ -11,13 +11,6 @@ include "mlir/Pass/PassBase.td" -def SCFBufferize : Pass<"scf-bufferize"> { - let summary = "Bufferize the scf dialect."; - let constructor = "mlir::createSCFBufferizePass()"; - let dependentDialects = ["bufferization::BufferizationDialect", - "memref::MemRefDialect"]; -} - // Note: Making these canonicalization patterns would require a dependency // of the SCF dialect on the Affine/Tensor/MemRef dialects or vice versa. def SCFForLoopCanonicalization diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp index cf40443ff383906..779c41a22e9ee2b 100644 --- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp @@ -649,7 +649,8 @@ struct ForOpInterface if (failed(bufferizableOp.resolveTensorOpOperandConflicts(rewriter, state))) return failure(); - if (!state.getOptions().enforceAliasingInvariants) + if (!state.getOptions().enforceAliasingInvariants || + state.getOptions().copyBeforeWrite) return success(); // According to the `getAliasing...` implementations, a bufferized OpResult @@ -889,7 +890,8 @@ struct WhileOpInterface if (failed(bufferizableOp.resolveTensorOpOperandConflicts(rewriter, state))) return failure(); - if (!state.getOptions().enforceAliasingInvariants) + if (!state.getOptions().enforceAliasingInvariants || + state.getOptions().copyBeforeWrite) return success(); // According to the `getAliasing...` implementations, a bufferized OpResult diff --git a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp deleted file mode 100644 index 21c618ab633f604..000000000000000 --- a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp +++ /dev/null @@ -1,47 +0,0 @@ -//===- Bufferize.cpp - scf bufferize pass ---------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/SCF/Transforms/Passes.h" - -#include "mlir/Dialect/Bufferization/IR/Bufferization.h" -#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/SCF/IR/SCF.h" -#include "mlir/Dialect/SCF/Transforms/Patterns.h" -#include "mlir/Transforms/DialectConversion.h" - -namespace mlir { -#define GEN_PASS_DEF_SCFBUFFERIZE -#include "mlir/Dialect/SCF/Transforms/Passes.h.inc" -} // namespace mlir - -using namespace mlir; -using namespace mlir::scf; - -namespace { -struct SCFBufferizePass : public impl::SCFBufferizeBase { - void runOnOperation() override { - auto *func = getOperation(); - auto *context = &getContext(); - - bufferization::BufferizeTypeConverter typeConverter; - RewritePatternSet patterns(context); - ConversionTarget target(*context); - - bufferization::populateBufferizeMaterializationLegality(target); - populateSCFStructuralTypeConversionsAndLegality(typeConverter, patterns, - target); - if (failed(applyPartialConversion(func, target, std::move(patterns)))) - return signalPassFailure(); - }; -}; -} // namespace - -std::unique_ptr mlir::createSCFBufferizePass() { - return std::make_unique(); -} diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt index 5dc7c60792b9b65..e99b5d0cc26fc7d 100644 --- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt @@ -1,7 +1,6 @@ add_mlir_dialect_library(MLIRSCFTransforms BufferDeallocationOpInterfaceImpl.cpp BufferizableOpInterfaceImpl.cpp - Bufferize.cpp ForallToFor.cpp ForallToParallel.cpp ForToWhile.cpp diff --git a/mlir/test/Dialect/SCF/bufferize.mlir b/mlir/test/Dialect/SCF/bufferize.mlir index ff1612310255a0d..53fcee692226cb7 100644 --- a/mlir/test/Dialect/SCF/bufferize.mlir +++ b/mlir/test/Dialect/SCF/bufferize.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -scf-bufferize | FileCheck %s +// RUN: mlir-opt %s -one-shot-bufferize="dialect-filter=scf,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s // CHECK-LABEL: func @if( // CHECK-SAME: %[[PRED:.*]]: i1, @@ -23,15 +23,21 @@ func.func @if(%pred: i1, %true_val: tensor, %false_val: tensor) -> return %0 : tensor } +// ----- + // CHECK-LABEL: func @for( // CHECK-SAME: %[[TENSOR:.*]]: tensor, // CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, // CHECK-SAME: %[[STEP:.*]]: index) -> tensor { // CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref -// CHECK: %[[RESULT_MEMREF:.*]] = scf.for %[[VAL_6:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF]]) -> (memref) { +// Note: scf.for iter_args always bufferize to a memory write. This could be +// optimized by analyzing the loop body. +// CHECK: %[[MEMREF_COPY:.*]] = memref.alloc() +// CHECK: memref.copy %[[MEMREF]], %[[MEMREF_COPY]] +// CHECK: %[[RESULT_MEMREF:.*]] = scf.for %{{.*}} = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF_COPY]]) -> (memref) { // CHECK: scf.yield %[[ITER]] : memref // CHECK: } {some_attr} -// CHECK: %[[VAL_8:.*]] = bufferization.to_tensor %[[VAL_9:.*]] : memref +// CHECK: %[[VAL_8:.*]] = bufferization.to_tensor %[[RESULT_MEMREF]] : memref // CHECK: return %[[VAL_8]] : tensor // CHECK: } func.func @for(%arg0: tensor, %lb: index, %ub: index, %step: index) -> tensor { @@ -41,6 +47,8 @@ func.func @for(%arg0: tensor, %lb: index, %ub: index, %step: index) -> tens return %ret : tensor } +// ----- + // Check whether this converts at all. // // It would previously fail altogether. @@ -57,17 +65,23 @@ func.func @if_correct_recursive_legalization_behavior(%pred: i1, %tensor: tensor return %0 : tensor } +// ----- + // CHECK-LABEL: func @for_correct_recursive_legalization_behavior( // CHECK-SAME: %[[TENSOR:.*]]: tensor, // CHECK-SAME: %[[INDEX:.*]]: index) -> tensor { // CHECK: %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref -// CHECK: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF]]) -> (memref) { +// Note: scf.for iter_args always bufferize to a memory write. This could be +// optimized by analyzing the loop body. +// CHECK: %[[MEMREF_COPY:.*]] = memref.alloc() +// CHECK: memref.copy %[[MEMREF]], %[[MEMREF_COPY]] +// CHECK: %[[RESULT:.*]] = scf.for %{{.*}} = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF_COPY]]) -> (memref) { // CHECK: %[[TENSOR_ITER:.*]] = bufferization.to_tensor %[[MEMREF_ITER]] : memref // CHECK: %[[TENSOR_MUNGED:.*]] = "test.munge_tensor"(%[[TENSOR_ITER]]) : (tensor) -> tensor // CHECK: %[[MEMREF_MUNGED:.*]] = bufferization.to_memref %[[TENSOR_MUNGED]] : memref // CHECK: scf.yield %[[MEMREF_MUNGED]] : memref // CHECK: } -// CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT:.*]] : memref +// CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT]] : memref // CHECK: return %[[TENSOR]] : tensor // CHECK: } func.func @for_correct_recursive_legalization_behavior(%arg0: tensor, %index: index) -> tensor { @@ -78,11 +92,17 @@ func.func @for_correct_recursive_legalization_behavior(%arg0: tensor, %inde return %ret : tensor } +// ----- + // CHECK-LABEL: func @bufferize_while( // CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64, %[[ARG2:.*]]: tensor // CHECK: %[[M:.*]] = bufferization.to_memref %[[ARG2]] : memref -// CHECK: %[[RES1:.*]]:3 = scf.while (%{{.*}} = %[[ARG0]], %{{.*}} = %[[M]]) : (i64, memref) -> (i64, i64, memref) -// CHECK: scf.condition(%{{.*}}) %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, memref +// Note: scf.while iter_args always bufferize to a memory write. This could be +// optimized by analyzing the loop body. +// CHECK: %[[MEMREF_COPY:.*]] = memref.alloc() +// CHECK: memref.copy %[[M]], %[[MEMREF_COPY]] +// CHECK: %[[RES1:.*]]:3 = scf.while (%{{.*}} = %[[ARG0]], %[[ITER:.*]] = %[[MEMREF_COPY]]) : (i64, memref) -> (i64, i64, memref) +// CHECK: scf.condition(%{{.*}}) %{{.*}}, %{{.*}}, %[[ITER]] : i64, i64, memref // CHECK: ^bb0(%{{.*}}: i64, %{{.*}}: i64, %{{.*}}: memref): // CHECK: scf.yield %{{.*}}, %{{.*}} : i64, memref // CHECK: %[[RES2:.*]] = bufferization.to_tensor %[[RES1]]#2 : memref From 6233346895abfb57782511cddc263d439fdd537b Mon Sep 17 00:00:00 2001 From: Chengjun Date: Mon, 28 Oct 2024 17:22:48 -0700 Subject: [PATCH 242/425] [GenericCycle] Add a Cache for getExitBlocks in GenericCycle (#112290) In `UniformityAnalysis`, we need to get the exit blocks of cycles in the `DivergencePropagator` and currently, we have to do a search for the exit blocks every time. In this change, we add a cache of the results in the `GenericCycle` so that it can save the compile time. By testing, for some large cases, this can save about 60% compile time in the `UniformityAnalysis`. --- llvm/include/llvm/ADT/GenericCycleImpl.h | 9 +++++++++ llvm/include/llvm/ADT/GenericCycleInfo.h | 21 +++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h index 3d2c5f428835587..41ba8bf8fde14b3 100644 --- a/llvm/include/llvm/ADT/GenericCycleImpl.h +++ b/llvm/include/llvm/ADT/GenericCycleImpl.h @@ -47,6 +47,11 @@ bool GenericCycle::contains(const GenericCycle *C) const { template void GenericCycle::getExitBlocks( SmallVectorImpl &TmpStorage) const { + if (!ExitBlocksCache.empty()) { + TmpStorage = ExitBlocksCache; + return; + } + TmpStorage.clear(); size_t NumExitBlocks = 0; @@ -65,6 +70,7 @@ void GenericCycle::getExitBlocks( TmpStorage.resize(NumExitBlocks); } + ExitBlocksCache.append(TmpStorage.begin(), TmpStorage.end()); } template @@ -298,6 +304,8 @@ void GenericCycleInfo::moveTopLevelCycleToNewParent(CycleT *NewParent, for (auto &It : BlockMapTopLevel) if (It.second == Child) It.second = NewParent; + NewParent->clearCache(); + Child->clearCache(); } template @@ -316,6 +324,7 @@ void GenericCycleInfo::addBlockToCycle(BlockT *Block, CycleT *Cycle) { } BlockMapTopLevel.try_emplace(Block, Cycle); + Cycle->clearCache(); } /// \brief Main function of the cycle info computations. diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h index 8c2fa0490e638a3..b8b6e3e9967a4a3 100644 --- a/llvm/include/llvm/ADT/GenericCycleInfo.h +++ b/llvm/include/llvm/ADT/GenericCycleInfo.h @@ -74,16 +74,27 @@ template class GenericCycle { /// always have the same depth. unsigned Depth = 0; + /// Cache for the results of GetExitBlocks + mutable SmallVector ExitBlocksCache; + void clear() { Entries.clear(); Children.clear(); Blocks.clear(); Depth = 0; ParentCycle = nullptr; + clearCache(); + } + + void appendEntry(BlockT *Block) { + Entries.push_back(Block); + clearCache(); } - void appendEntry(BlockT *Block) { Entries.push_back(Block); } - void appendBlock(BlockT *Block) { Blocks.insert(Block); } + void appendBlock(BlockT *Block) { + Blocks.insert(Block); + clearCache(); + } GenericCycle(const GenericCycle &) = delete; GenericCycle &operator=(const GenericCycle &) = delete; @@ -102,6 +113,11 @@ template class GenericCycle { return Entries; } + /// Clear the cache of the cycle. + /// This should be run in all non-const function in GenericCycle + /// and GenericCycleInfo. + void clearCache() const { ExitBlocksCache.clear(); } + /// \brief Return whether \p Block is an entry block of the cycle. bool isEntry(const BlockT *Block) const { return is_contained(Entries, Block); @@ -112,6 +128,7 @@ template class GenericCycle { assert(contains(Block)); Entries.clear(); Entries.push_back(Block); + clearCache(); } /// \brief Return whether \p Block is contained in the cycle. From 61353cc1f65f02477eedeebcb08e9193cbd53305 Mon Sep 17 00:00:00 2001 From: Eisuke Kawashima Date: Tue, 29 Oct 2024 09:32:36 +0900 Subject: [PATCH 243/425] [compiler-rt] Fix invalid escape sequences in python files (#94030) \d, \( and \) are not valid escape sequences; since python 3.12 they give SyntaxWarning, and will raise SyntaxError in future. https://docs.python.org/3.12/whatsnew/3.12.html#other-language-changes r"\(\d\)" and "\\(\\d\\)" are equivalent but the former is the shorter. Co-authored-by: Eisuke Kawashima --- compiler-rt/lib/asan/scripts/asan_symbolize.py | 4 ++-- compiler-rt/lib/hwasan/scripts/hwasan_symbolize | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py index b08769614aeb18f..058a1614b55e6a7 100755 --- a/compiler-rt/lib/asan/scripts/asan_symbolize.py +++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py @@ -316,7 +316,7 @@ def symbolize(self, addr, binary, offset): # * For C functions atos omits parentheses and argument types. # * For C++ functions the function name (i.e., `foo` above) may contain # templates which may contain parentheses. - match = re.match("^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line) + match = re.match(r"^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line) logging.debug("atos_line: %s", atos_line) if match: function_name = match.group(1) @@ -541,7 +541,7 @@ def process_line_posix(self, line): # names in the regex because it could be an # Objective-C or C++ demangled name. stack_trace_line_format = ( - "^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)" + r"^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)" ) match = re.match(stack_trace_line_format, line) if not match: diff --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize index d1b2857ccd8156f..efca6b82809b970 100755 --- a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize +++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize @@ -316,7 +316,7 @@ class Symbolizer: self.__last_access_tag = int(match.group(2), 16) def process_tag_dump_line(self, line, ignore_tags=False): - m = re.match(r'.*?(0x[0-9a-f]+):' + '([ ]*[\[ ][0-9a-f][0-9a-f]\]?)' * 16, line) + m = re.match(r'.*?(0x[0-9a-f]+):' + r'([ ]*[\[ ][0-9a-f][0-9a-f]\]?)' * 16, line) if m is None: return False addr = m.group(1) From 1ceccbb0dd9d8539fec2213566fe6cc2a05b7993 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 28 Oct 2024 17:33:53 -0700 Subject: [PATCH 244/425] VirtRegRewriter: Add implicit register defs for live out undef lanes (#112679) If an undef subregister def is live into another block, we need to maintain a physreg def to track the liveness of those lanes. This would manifest a verifier error after branch folding, when the cloned tail block use no longer had a def. We need to detect interference with other assigned intervals to avoid clobbering the undef lanes defined in other intervals, since the undef def didn't count as interference. This is pretty ugly and adds a new dependency on LiveRegMatrix, keeping it live for one more pass. It also adds a lot of implicit operand spam (we really should have a better representation for this). There is a missing verifier check for this situation. Added an xfailed test that demonstrates this. We may also be able to revert the changes in 47d3cbcf842a036c20c3f1c74255cdfc213f41c2. It might be better to insert an IMPLICIT_DEF before the instruction rather than using the implicit-def operand. Fixes #98474 --- llvm/include/llvm/CodeGen/LiveRegMatrix.h | 10 + llvm/lib/CodeGen/LiveRegMatrix.cpp | 35 ++ llvm/lib/CodeGen/VirtRegMap.cpp | 68 ++++ .../branch-folding-implicit-def-subreg.ll | 65 ++-- ...nfloop-subrange-spill-inspect-subrange.mir | 4 +- .../CodeGen/AMDGPU/infloop-subrange-spill.mir | 4 +- ...sue98474-assigned-physreg-interference.mir | 55 +++ ...474-need-live-out-undef-subregister-def.ll | 42 ++ ...egrewriter-live-out-undef-subregisters.mir | 363 ++++++++++++++++++ ...ssing-def-liveout-physical-subregister.mir | 36 ++ 10 files changed, 643 insertions(+), 39 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir create mode 100644 llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll create mode 100644 llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir create mode 100644 llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h index 84050bf17073776..486392ca3c49d5f 100644 --- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h +++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h @@ -118,6 +118,16 @@ class LiveRegMatrix { /// the segment [Start, End). bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg); + /// Check for interference in the segment [Start, End) that may prevent + /// assignment to PhysReg, like checkInterference. Returns a lane mask of + /// which lanes of the physical register interfere in the segment [Start, End) + /// of some other interval already assigned to PhysReg. + /// + /// If this function returns LaneBitmask::getNone(), PhysReg is completely + /// free at the segment [Start, End). + LaneBitmask checkInterferenceLanes(SlotIndex Start, SlotIndex End, + MCRegister PhysReg); + /// Assign VirtReg to PhysReg. /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and /// update VirtRegMap. The live range is expected to be available in PhysReg. diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp index a57233cf37da014..bc8c59381a40e17 100644 --- a/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -244,6 +244,41 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End, return false; } +LaneBitmask LiveRegMatrix::checkInterferenceLanes(SlotIndex Start, + SlotIndex End, + MCRegister PhysReg) { + // Construct artificial live range containing only one segment [Start, End). + VNInfo valno(0, Start); + LiveRange::Segment Seg(Start, End, &valno); + LiveRange LR; + LR.addSegment(Seg); + + LaneBitmask InterferingLanes; + + // Check for interference with that segment + for (MCRegUnitMaskIterator MCRU(PhysReg, TRI); MCRU.isValid(); ++MCRU) { + auto [Unit, Lanes] = *MCRU; + // LR is stack-allocated. LiveRegMatrix caches queries by a key that + // includes the address of the live range. If (for the same reg unit) this + // checkInterference overload is called twice, without any other query() + // calls in between (on heap-allocated LiveRanges) - which would invalidate + // the cached query - the LR address seen the second time may well be the + // same as that seen the first time, while the Start/End/valno may not - yet + // the same cached result would be fetched. To avoid that, we don't cache + // this query. + // + // FIXME: the usability of the Query API needs to be improved to avoid + // subtle bugs due to query identity. Avoiding caching, for example, would + // greatly simplify things. + LiveIntervalUnion::Query Q; + Q.reset(UserTag, LR, Matrix[Unit]); + if (Q.checkInterference()) + InterferingLanes |= Lanes; + } + + return InterferingLanes; +} + Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const { const LiveInterval *VRegInterval = nullptr; for (MCRegUnit Unit : TRI->regunits(PhysReg)) { diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index 46253b1743d97e0..26a12512c87be07 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/LiveDebugVariables.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -203,6 +204,7 @@ class VirtRegRewriter : public MachineFunctionPass { MachineRegisterInfo *MRI = nullptr; SlotIndexes *Indexes = nullptr; LiveIntervals *LIS = nullptr; + LiveRegMatrix *LRM = nullptr; VirtRegMap *VRM = nullptr; LiveDebugVariables *DebugVars = nullptr; DenseSet RewriteRegs; @@ -215,6 +217,9 @@ class VirtRegRewriter : public MachineFunctionPass { void handleIdentityCopy(MachineInstr &MI); void expandCopyBundle(MachineInstr &MI) const; bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const; + LaneBitmask liveOutUndefPhiLanesForUndefSubregDef( + const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg, + MCPhysReg PhysReg, const MachineInstr &MI) const; public: static char ID; @@ -247,6 +252,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy) INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", @@ -262,6 +268,7 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addRequired(); if (!ClearVirtRegs) AU.addPreserved(); @@ -276,6 +283,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { MRI = &MF->getRegInfo(); Indexes = &getAnalysis().getSI(); LIS = &getAnalysis().getLIS(); + LRM = &getAnalysis().getLRM(); VRM = &getAnalysis().getVRM(); DebugVars = &getAnalysis(); LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" @@ -548,6 +556,40 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI, return false; } +/// Compute a lanemask for undef lanes which need to be preserved out of the +/// defining block for a register assignment for a subregister def. \p PhysReg +/// is assigned to \p LI, which is the main range. +LaneBitmask VirtRegRewriter::liveOutUndefPhiLanesForUndefSubregDef( + const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg, + MCPhysReg PhysReg, const MachineInstr &MI) const { + LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg); + LaneBitmask LiveOutUndefLanes; + + for (const LiveInterval::SubRange &SR : LI.subranges()) { + // Figure out which lanes are undef live into a successor. + LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask; + if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) { + for (const MachineBasicBlock *Succ : MBB.successors()) { + if (LIS->isLiveInToMBB(SR, Succ)) + LiveOutUndefLanes |= NeedImpDefLanes; + } + } + } + + SlotIndex MIIndex = LIS->getInstructionIndex(MI); + SlotIndex BeforeMIUses = MIIndex.getBaseIndex(); + LaneBitmask InterferingLanes = + LRM->checkInterferenceLanes(BeforeMIUses, MIIndex.getRegSlot(), PhysReg); + LiveOutUndefLanes &= ~InterferingLanes; + + LLVM_DEBUG(if (LiveOutUndefLanes.any()) { + dbgs() << "Need live out undef defs for " << printReg(PhysReg) + << LiveOutUndefLanes << " from " << printMBBReference(MBB) << '\n'; + }); + + return LiveOutUndefLanes; +} + void VirtRegRewriter::rewrite() { bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); SmallVector SuperDeads; @@ -602,6 +644,32 @@ void VirtRegRewriter::rewrite() { MO.setIsUndef(true); } else if (!MO.isDead()) { assert(MO.isDef()); + if (MO.isUndef()) { + const LiveInterval &LI = LIS->getInterval(VirtReg); + + LaneBitmask LiveOutUndefLanes = + liveOutUndefPhiLanesForUndefSubregDef(LI, *MBBI, SubReg, + PhysReg, MI); + if (LiveOutUndefLanes.any()) { + SmallVector CoveringIndexes; + + // TODO: Just use one super register def if none of the lanes + // are needed? + if (!TRI->getCoveringSubRegIndexes( + *MRI, MRI->getRegClass(VirtReg), LiveOutUndefLanes, + CoveringIndexes)) + llvm_unreachable( + "cannot represent required subregister defs"); + + // Try to represent the minimum needed live out def as a + // sequence of subregister defs. + // + // FIXME: It would be better if we could directly represent + // liveness with a lanemask instead of spamming operands. + for (unsigned SubIdx : CoveringIndexes) + SuperDefs.push_back(TRI->getSubReg(PhysReg, SubIdx)); + } + } } } diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index 862543299239717..055e9850de3d68a 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -38,24 +38,19 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc - ; GFX90A-NEXT: $vgpr22 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr10 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr24 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr18 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr20 = IMPLICIT_DEF ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.59, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20 + ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 + ; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18 + ; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20 + ; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22 + ; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24 ; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3.Flow17: @@ -111,8 +106,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.6.Flow20: @@ -395,8 +390,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.37, implicit $exec @@ -434,8 +429,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.39, implicit $exec @@ -484,8 +479,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.41, implicit $exec @@ -535,8 +530,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.47, implicit $exec @@ -589,8 +584,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0 ; GFX90A-NEXT: {{ $}} @@ -643,8 +638,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr16_sgpr17 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.43, implicit $exec @@ -689,8 +684,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: S_BRANCH %bb.45 ; GFX90A-NEXT: {{ $}} @@ -719,8 +714,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: S_BRANCH %bb.46 ; GFX90A-NEXT: {{ $}} @@ -748,8 +743,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: S_BRANCH %bb.62 ; GFX90A-NEXT: {{ $}} @@ -773,8 +768,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF ; GFX90A-NEXT: $sgpr58_sgpr59 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.53, implicit $exec @@ -880,8 +875,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF + ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 + ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: $sgpr50_sgpr51 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.57, implicit $exec ; GFX90A-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir index 6603f2ef7adef71..7421a2e10c3b572 100644 --- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir @@ -32,7 +32,7 @@ body: | ; CHECK-NEXT: dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4) ; CHECK-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25 ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4) ; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.6, implicit $exec @@ -82,7 +82,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24 + ; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24 ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc ; CHECK-NEXT: S_BRANCH %bb.6 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir index fa95f4c13417429..8ae6c279558961c 100644 --- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir @@ -30,7 +30,7 @@ body: | ; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF ; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25 ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4) ; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec @@ -78,7 +78,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX8_IMM undef renamable $sgpr4_sgpr5, 32, 0 :: (invariant load (s256), addrspace 4) ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24 + ; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24 ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir new file mode 100644 index 000000000000000..786ce402038369d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir @@ -0,0 +1,55 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s + +--- +name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +body: | + ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr0, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr4_vgpr5 + ; CHECK-NEXT: EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: liveins: $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: renamable $vgpr3_vgpr4_vgpr5 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8) + ; CHECK-NEXT: EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0, $vgpr2 + + %2:vgpr_32 = COPY $vgpr2 + S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit killed $scc + + bb.1: + undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec + S_BRANCH %bb.3 + + bb.2: + S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8) + + bb.3: + EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, %2:vgpr_32, 0, 0, 0, implicit $exec + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll new file mode 100644 index 000000000000000..7caa563d8b29830 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s + +; Check for verifier error after tail duplication. An implicit_def of +; a subregsiter is needed to maintain liveness after assignment. + +define amdgpu_vs void @test(i32 inreg %cmp, i32 %e0) { +; CHECK-LABEL: test: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %load +; CHECK-NEXT: s_mov_b32 s1, s0 +; CHECK-NEXT: s_mov_b32 s2, s0 +; CHECK-NEXT: s_mov_b32 s3, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: buffer_load_format_xy v[1:2], v1, s[0:3], 0 idxen +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: exp mrt0 v0, v1, v2, v0 +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: exp mrt0 v0, v1, v2, v0 +; CHECK-NEXT: s_endpgm +entry: + %cond = icmp eq i32 %cmp, 0 + br i1 %cond, label %end, label %load + +load: + %data1 = call <2 x i32> @llvm.amdgcn.struct.buffer.load.format.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0) + %e1 = extractelement <2 x i32> %data1, i32 0 + %e2 = extractelement <2 x i32> %data1, i32 1 + br label %end + +end: + %out1 = phi i32 [ 0, %entry ], [ %e1, %load ] + %out2 = phi i32 [ poison, %entry ], [ %e2, %load ] + call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 %e0, i32 %out1, i32 %out2, i32 %e0, i1 false, i1 false) + ret void +} + diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir new file mode 100644 index 000000000000000..86b6c5982b4cbd8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir @@ -0,0 +1,363 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s + +# The partial def of %0 introduces a live out undef def of %0.sub1 +# into bb.3. We need to maintain this liveness with an explicit def of +# the physical subregister. Without this, a verifier error would +# appear after tail duplication. + +--- +name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +body: | + ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1 + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8) + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0 + + S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit killed $scc + + bb.1: + undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + S_BRANCH %bb.3 + + bb.2: + S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8) + + bb.3: + EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec + S_ENDPGM 0 + +... + +--- +name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +body: | + ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2 + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8) + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0 + + S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit killed $scc + + bb.1: + undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec + S_BRANCH %bb.3 + + bb.2: + S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8) + + bb.3: + EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec + S_ENDPGM 0 + +... + +--- +name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +body: | + ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr2, implicit-def $vgpr0 + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8) + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0 + + S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit killed $scc + + bb.1: + undef %0.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec + S_BRANCH %bb.3 + + bb.2: + S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8) + + bb.3: + EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec + S_ENDPGM 0 + +... + +# Test another use of the value before the block end. +--- +name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +body: | + ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1 + ; CHECK-NEXT: S_NOP 0, implicit renamable $vgpr0_vgpr1 + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8) + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0 + + S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit killed $scc + + bb.1: + undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + S_NOP 0, implicit %0 + S_BRANCH %bb.3 + + bb.2: + S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8) + + bb.3: + EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec + S_ENDPGM 0 + +... + +# The undef subregister is not live out, no implicit def should be added for it +--- +name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +body: | + ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $sgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8) + ; CHECK-NEXT: EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $sgpr0 + + S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit killed $scc + + bb.1: + undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec + S_BRANCH %bb.3 + + bb.2: + S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8) + + bb.3: + EXP 0, killed %0.sub0, killed %0.sub0, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec + S_ENDPGM 0 + +... + +# In bb.2, %0 should be assigned to vgpr0_vgpr1. Make sure the value +# copied from $vgpr0 into %3 isn't clobbered by the undef phi def for +# %0.sub1. +--- +name: assigned_physreg_subregister_interference +tracksRegLiveness: true +frameInfo: + adjustsStack: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + wwmReservedRegs: + - '$vgpr63' +body: | + ; CHECK-LABEL: name: assigned_physreg_subregister_interference + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40 + ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40 + ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr34, 2, $vgpr40 + ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr35, 3, $vgpr40 + ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr36, 4, $vgpr40 + ; CHECK-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr37, 5, $vgpr40 + ; CHECK-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $vgpr0_vgpr1:0x000000000000000F + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec + ; CHECK-NEXT: renamable $sgpr5 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec + ; CHECK-NEXT: renamable $vcc = V_CMP_EQ_U64_e64 $sgpr4_sgpr5, killed $vgpr0_vgpr1, implicit $exec + ; CHECK-NEXT: renamable $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 killed renamable $vcc, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: dead $sgpr30_sgpr31 = noconvergent SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + ; CHECK-NEXT: renamable $vgpr1 = COPY $vgpr0, implicit $exec + ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 123, implicit $exec + ; CHECK-NEXT: $exec = S_XOR_B64 $exec, renamable $sgpr36_sgpr37, implicit-def dead $scc + ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: liveins: $vgpr1, $sgpr34_sgpr35 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = COPY renamable $sgpr34_sgpr35 + ; CHECK-NEXT: renamable $vgpr0 = V_ADD_U32_e32 1, killed $vgpr1, implicit $exec + ; CHECK-NEXT: $sgpr37 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 5 + ; CHECK-NEXT: $sgpr36 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 4 + ; CHECK-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 3 + ; CHECK-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 2 + ; CHECK-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 1 + ; CHECK-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0 + ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + bb.0: + liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1, $vgpr63 + + $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr63 + $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr63 + $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr34, 2, $vgpr63 + $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr35, 3, $vgpr63 + $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr36, 4, $vgpr63 + $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr37, 5, $vgpr63 + undef %0.sub0:vreg_64 = COPY $vgpr0 + %0.sub1:vreg_64 = COPY $vgpr1 + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + renamable $sgpr34_sgpr35 = S_MOV_B64 $exec + + bb.1: + liveins: $vgpr63, $sgpr34_sgpr35 + + renamable $sgpr4 = V_READFIRSTLANE_B32 %0.sub0, implicit $exec + renamable $sgpr5 = V_READFIRSTLANE_B32 %0.sub1, implicit $exec + renamable $vcc = V_CMP_EQ_U64_e64 $sgpr4_sgpr5, %0, implicit $exec + renamable $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 killed renamable $vcc, implicit-def $exec, implicit-def dead $scc, implicit $exec + + bb.2: + liveins: $vgpr63, $sgpr4_sgpr5:0x000000000000000F, $sgpr34_sgpr35, $sgpr36_sgpr37 + + dead $sgpr30_sgpr31 = noconvergent SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 + %3:vgpr_32 = COPY $vgpr0 + undef %0.sub0:vreg_64 = V_MOV_B32_e32 123, implicit $exec + $exec = S_XOR_B64_term $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc + S_CBRANCH_EXECNZ %bb.1, implicit $exec + + bb.3: + liveins: $vgpr63, $sgpr34_sgpr35 + + $exec = S_MOV_B64_term killed renamable $sgpr34_sgpr35 + + bb.4: + liveins: $vgpr63 + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + %6:vgpr_32 = V_ADD_U32_e32 1, %3, implicit $exec + $vgpr0 = COPY %6 + $sgpr37 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 5 + $sgpr36 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 4 + $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 3 + $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 2 + $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 1 + $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0 + SI_RETURN implicit $vgpr0 + +... diff --git a/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir b/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir new file mode 100644 index 000000000000000..892a4298bbdb518 --- /dev/null +++ b/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir @@ -0,0 +1,36 @@ +# XFAIL: * +# RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=none -filetype=null %s + +# FIXME: This should fail the machine verifier. There is a missing def +# of $vgpr2 in bb.1, which is needed since it's live into bb.3 + +--- +name: missing_live_out_subreg_def +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit killed $scc + + bb.1: + liveins: $vgpr0 + + renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec + S_BRANCH %bb.3 + + bb.2: + liveins: $vgpr0 + + renamable $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF + renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec + renamable $vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8) + + bb.3: + liveins: $vgpr0, $vgpr1_vgpr2 + + EXP 0, killed renamable $vgpr0, killed renamable $vgpr1, renamable $vgpr2, renamable $vgpr0, 0, 0, 0, implicit $exec + S_ENDPGM 0 + +... From a461869db3bdc372203c9a7b8326d66a626f80d9 Mon Sep 17 00:00:00 2001 From: vporpo Date: Mon, 28 Oct 2024 18:00:52 -0700 Subject: [PATCH 245/425] [SandboxIR][Pass] Implement Analyses class (#113962) The Analyses class provides a way to pass around commonly used Analyses to SandboxIR passes throught `runOnFunction()` and `runOnRegion()` functions. --- llvm/include/llvm/SandboxIR/Pass.h | 27 ++++++++++++++--- llvm/include/llvm/SandboxIR/PassManager.h | 4 +-- .../SandboxVectorizer/Passes/BottomUpVec.h | 2 +- .../SandboxVectorizer/Passes/NullPass.h | 2 +- .../Passes/PrintInstructionCount.h | 2 +- .../Passes/RegionsFromMetadata.h | 2 +- .../SandboxVectorizer/SandboxVectorizer.h | 2 ++ llvm/lib/SandboxIR/PassManager.cpp | 8 ++--- .../SandboxVectorizer/Passes/BottomUpVec.cpp | 2 +- .../Passes/RegionsFromMetadata.cpp | 4 +-- .../SandboxVectorizer/SandboxVectorizer.cpp | 4 ++- llvm/unittests/SandboxIR/PassTest.cpp | 30 +++++++++---------- 12 files changed, 56 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/SandboxIR/Pass.h b/llvm/include/llvm/SandboxIR/Pass.h index 5ed9d7442ee70cd..fee6bd9e779fda6 100644 --- a/llvm/include/llvm/SandboxIR/Pass.h +++ b/llvm/include/llvm/SandboxIR/Pass.h @@ -12,11 +12,29 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -namespace llvm::sandboxir { +namespace llvm { + +class ScalarEvolution; + +namespace sandboxir { class Function; class Region; +class Analyses { + ScalarEvolution *SE = nullptr; + + Analyses() = default; + +public: + Analyses(ScalarEvolution &SE) : SE(&SE) {} + +public: + ScalarEvolution &getScalarEvolution() const { return *SE; } + /// For use by unit tests. + static Analyses emptyForTesting() { return Analyses(); } +}; + /// The base class of a Sandbox IR Pass. class Pass { protected: @@ -52,7 +70,7 @@ class FunctionPass : public Pass { /// \p Name can't contain any spaces or start with '-'. FunctionPass(StringRef Name) : Pass(Name) {} /// \Returns true if it modifies \p F. - virtual bool runOnFunction(Function &F) = 0; + virtual bool runOnFunction(Function &F, const Analyses &A) = 0; }; /// A pass that runs on a sandbox::Region. @@ -61,9 +79,10 @@ class RegionPass : public Pass { /// \p Name can't contain any spaces or start with '-'. RegionPass(StringRef Name) : Pass(Name) {} /// \Returns true if it modifies \p R. - virtual bool runOnRegion(Region &R) = 0; + virtual bool runOnRegion(Region &R, const Analyses &A) = 0; }; -} // namespace llvm::sandboxir +} // namespace sandboxir +} // namespace llvm #endif // LLVM_SANDBOXIR_PASS_H diff --git a/llvm/include/llvm/SandboxIR/PassManager.h b/llvm/include/llvm/SandboxIR/PassManager.h index e8221996bc8f049..77154cc71434546 100644 --- a/llvm/include/llvm/SandboxIR/PassManager.h +++ b/llvm/include/llvm/SandboxIR/PassManager.h @@ -208,7 +208,7 @@ class FunctionPassManager final FunctionPassManager(StringRef Name, StringRef Pipeline, CreatePassFunc CreatePass) : PassManager(Name, Pipeline, CreatePass) {} - bool runOnFunction(Function &F) final; + bool runOnFunction(Function &F, const Analyses &A) final; }; class RegionPassManager final : public PassManager { @@ -217,7 +217,7 @@ class RegionPassManager final : public PassManager { RegionPassManager(StringRef Name, StringRef Pipeline, CreatePassFunc CreatePass) : PassManager(Name, Pipeline, CreatePass) {} - bool runOnRegion(Region &R) final; + bool runOnRegion(Region &R, const Analyses &A) final; }; } // namespace llvm::sandboxir diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h index 5cd47efd6b34620..2b0b3f8192c0482 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h @@ -33,7 +33,7 @@ class BottomUpVec final : public FunctionPass { public: BottomUpVec(StringRef Pipeline); - bool runOnFunction(Function &F) final; + bool runOnFunction(Function &F, const Analyses &A) final; void printPipeline(raw_ostream &OS) const final { OS << getName() << "\n"; RPM.printPipeline(OS); diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h index 75b9f42520156ce..1025379770bac07 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h @@ -11,7 +11,7 @@ class Region; class NullPass final : public RegionPass { public: NullPass() : RegionPass("null") {} - bool runOnRegion(Region &R) final { return false; } + bool runOnRegion(Region &R, const Analyses &A) final { return false; } }; } // namespace llvm::sandboxir diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h index 9d88bc828038479..cd11d4c1489268b 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h @@ -12,7 +12,7 @@ namespace llvm::sandboxir { class PrintInstructionCount final : public RegionPass { public: PrintInstructionCount() : RegionPass("null") {} - bool runOnRegion(Region &R) final { + bool runOnRegion(Region &R, const Analyses &A) final { outs() << "InstructionCount: " << std::distance(R.begin(), R.end()) << "\n"; return false; } diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h index 3d82a61c90153aa..3d738ac8917effd 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h @@ -26,7 +26,7 @@ class RegionsFromMetadata final : public FunctionPass { public: RegionsFromMetadata(StringRef Pipeline); - bool runOnFunction(Function &F) final; + bool runOnFunction(Function &F, const Analyses &A) final; void printPipeline(raw_ostream &OS) const final { OS << getName() << "\n"; RPM.printPipeline(OS); diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h index b83744cf9e6cb68..03867df3d980845 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h @@ -10,6 +10,7 @@ #include +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/PassManager.h" #include "llvm/SandboxIR/PassManager.h" @@ -19,6 +20,7 @@ class TargetTransformInfo; class SandboxVectorizerPass : public PassInfoMixin { TargetTransformInfo *TTI = nullptr; + ScalarEvolution *SE = nullptr; // A pipeline of SandboxIR function passes run by the vectorizer. sandboxir::FunctionPassManager FPM; diff --git a/llvm/lib/SandboxIR/PassManager.cpp b/llvm/lib/SandboxIR/PassManager.cpp index 3a1cfa1d367a2aa..aaa49e0f6912b61 100644 --- a/llvm/lib/SandboxIR/PassManager.cpp +++ b/llvm/lib/SandboxIR/PassManager.cpp @@ -10,20 +10,20 @@ namespace llvm::sandboxir { -bool FunctionPassManager::runOnFunction(Function &F) { +bool FunctionPassManager::runOnFunction(Function &F, const Analyses &A) { bool Change = false; for (auto &Pass : Passes) { - Change |= Pass->runOnFunction(F); + Change |= Pass->runOnFunction(F, A); // TODO: run the verifier. } // TODO: Check ChangeAll against hashes before/after. return Change; } -bool RegionPassManager::runOnRegion(Region &R) { +bool RegionPassManager::runOnRegion(Region &R, const Analyses &A) { bool Change = false; for (auto &Pass : Passes) { - Change |= Pass->runOnRegion(R); + Change |= Pass->runOnRegion(R, A); // TODO: run the verifier. } // TODO: Check ChangeAll against hashes before/after. diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp index ede41cd661b559a..66d631edfc4076f 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp @@ -59,7 +59,7 @@ void BottomUpVec::vectorizeRec(ArrayRef Bndl) { void BottomUpVec::tryVectorize(ArrayRef Bndl) { vectorizeRec(Bndl); } -bool BottomUpVec::runOnFunction(Function &F) { +bool BottomUpVec::runOnFunction(Function &F, const Analyses &A) { Change = false; // TODO: Start from innermost BBs first for (auto &BB : F) { diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.cpp index 5887d5e8bc2683c..8e3f5b77429c5a0 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.cpp @@ -17,11 +17,11 @@ RegionsFromMetadata::RegionsFromMetadata(StringRef Pipeline) : FunctionPass("regions-from-metadata"), RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {} -bool RegionsFromMetadata::runOnFunction(Function &F) { +bool RegionsFromMetadata::runOnFunction(Function &F, const Analyses &A) { SmallVector> Regions = sandboxir::Region::createRegionsFromMD(F); for (auto &R : Regions) { - RPM.runOnRegion(*R); + RPM.runOnRegion(*R, A); } return false; } diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp index c68f9482e337dd5..96d825ed852fb22 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp @@ -51,6 +51,7 @@ SandboxVectorizerPass::~SandboxVectorizerPass() = default; PreservedAnalyses SandboxVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) { TTI = &AM.getResult(F); + SE = &AM.getResult(F); bool Changed = runImpl(F); if (!Changed) @@ -82,5 +83,6 @@ bool SandboxVectorizerPass::runImpl(Function &LLVMF) { // Create SandboxIR for LLVMF and run BottomUpVec on it. sandboxir::Context Ctx(LLVMF.getContext()); sandboxir::Function &F = *Ctx.createFunction(&LLVMF); - return FPM.runOnFunction(F); + sandboxir::Analyses A(*SE); + return FPM.runOnFunction(F, A); } diff --git a/llvm/unittests/SandboxIR/PassTest.cpp b/llvm/unittests/SandboxIR/PassTest.cpp index 866bd8233d8035d..751aedefd8fe2d5 100644 --- a/llvm/unittests/SandboxIR/PassTest.cpp +++ b/llvm/unittests/SandboxIR/PassTest.cpp @@ -46,7 +46,7 @@ define void @foo() { public: TestPass(unsigned &BBCnt) : FunctionPass("test-pass"), BBCnt(BBCnt) {} - bool runOnFunction(Function &F) final { + bool runOnFunction(Function &F, const Analyses &A) final { for ([[maybe_unused]] auto &BB : F) ++BBCnt; return false; @@ -59,7 +59,7 @@ define void @foo() { // Check classof(). EXPECT_TRUE(llvm::isa(TPass)); // Check runOnFunction(); - TPass.runOnFunction(*F); + TPass.runOnFunction(*F, Analyses::emptyForTesting()); EXPECT_EQ(BBCnt, 1u); #ifndef NDEBUG { @@ -80,7 +80,7 @@ define void @foo() { class TestNamePass final : public FunctionPass { public: TestNamePass(llvm::StringRef Name) : FunctionPass(Name) {} - bool runOnFunction(Function &F) { return false; } + bool runOnFunction(Function &F, const Analyses &A) { return false; } }; EXPECT_DEATH(TestNamePass("white space"), ".*whitespace.*"); EXPECT_DEATH(TestNamePass("-dash"), ".*start with.*"); @@ -106,7 +106,7 @@ define i8 @foo(i8 %v0, i8 %v1) { public: TestPass(unsigned &InstCount) : RegionPass("test-pass"), InstCount(InstCount) {} - bool runOnRegion(Region &R) final { + bool runOnRegion(Region &R, const Analyses &A) final { for ([[maybe_unused]] auto &Inst : R) { ++InstCount; } @@ -121,7 +121,7 @@ define i8 @foo(i8 %v0, i8 %v1) { llvm::SmallVector> Regions = Region::createRegionsFromMD(*F); ASSERT_EQ(Regions.size(), 1u); - TPass.runOnRegion(*Regions[0]); + TPass.runOnRegion(*Regions[0], Analyses::emptyForTesting()); EXPECT_EQ(InstCount, 2u); #ifndef NDEBUG { @@ -142,7 +142,7 @@ define i8 @foo(i8 %v0, i8 %v1) { class TestNamePass final : public RegionPass { public: TestNamePass(llvm::StringRef Name) : RegionPass(Name) {} - bool runOnRegion(Region &F) { return false; } + bool runOnRegion(Region &F, const Analyses &A) { return false; } }; EXPECT_DEATH(TestNamePass("white space"), ".*whitespace.*"); EXPECT_DEATH(TestNamePass("-dash"), ".*start with.*"); @@ -161,7 +161,7 @@ define void @foo() { public: TestPass1(unsigned &BBCnt) : FunctionPass("test-pass1"), BBCnt(BBCnt) {} - bool runOnFunction(Function &F) final { + bool runOnFunction(Function &F, const Analyses &A) final { for ([[maybe_unused]] auto &BB : F) ++BBCnt; return false; @@ -172,7 +172,7 @@ define void @foo() { public: TestPass2(unsigned &BBCnt) : FunctionPass("test-pass2"), BBCnt(BBCnt) {} - bool runOnFunction(Function &F) final { + bool runOnFunction(Function &F, const Analyses &A) final { for ([[maybe_unused]] auto &BB : F) ++BBCnt; return false; @@ -185,7 +185,7 @@ define void @foo() { FPM.addPass(std::make_unique(BBCnt1)); FPM.addPass(std::make_unique(BBCnt2)); // Check runOnFunction(). - FPM.runOnFunction(*F); + FPM.runOnFunction(*F, Analyses::emptyForTesting()); EXPECT_EQ(BBCnt1, 1u); EXPECT_EQ(BBCnt2, 1u); #ifndef NDEBUG @@ -216,7 +216,7 @@ define i8 @foo(i8 %v0, i8 %v1) { public: TestPass1(unsigned &InstCount) : RegionPass("test-pass1"), InstCount(InstCount) {} - bool runOnRegion(Region &R) final { + bool runOnRegion(Region &R, const Analyses &A) final { for ([[maybe_unused]] auto &Inst : R) ++InstCount; return false; @@ -228,7 +228,7 @@ define i8 @foo(i8 %v0, i8 %v1) { public: TestPass2(unsigned &InstCount) : RegionPass("test-pass2"), InstCount(InstCount) {} - bool runOnRegion(Region &R) final { + bool runOnRegion(Region &R, const Analyses &A) final { for ([[maybe_unused]] auto &Inst : R) ++InstCount; return false; @@ -244,7 +244,7 @@ define i8 @foo(i8 %v0, i8 %v1) { llvm::SmallVector> Regions = Region::createRegionsFromMD(*F); ASSERT_EQ(Regions.size(), 1u); - RPM.runOnRegion(*Regions[0]); + RPM.runOnRegion(*Regions[0], Analyses::emptyForTesting()); EXPECT_EQ(InstCount1, 2u); EXPECT_EQ(InstCount2, 2u); #ifndef NDEBUG @@ -270,7 +270,7 @@ define void @f() { public: FooPass(std::string &Str, llvm::StringRef Args) : FunctionPass("foo-pass"), Str(Str), Args(Args.str()) {} - bool runOnFunction(Function &F) final { + bool runOnFunction(Function &F, const Analyses &A) final { Str += "foo<" + Args + ">"; return false; } @@ -282,7 +282,7 @@ define void @f() { public: BarPass(std::string &Str, llvm::StringRef Args) : FunctionPass("bar-pass"), Str(Str), Args(Args.str()) {} - bool runOnFunction(Function &F) final { + bool runOnFunction(Function &F, const Analyses &A) final { Str += "bar<" + Args + ">"; return false; } @@ -302,7 +302,7 @@ define void @f() { FunctionPassManager FPM("test-fpm"); FPM.setPassPipeline("foo,bar>>,foo", CreatePass); - FPM.runOnFunction(*F); + FPM.runOnFunction(*F, Analyses::emptyForTesting()); EXPECT_EQ(Str, "foobar>>foo<>"); // A second call to setPassPipeline will trigger an assertion in debug mode. From 6128ff6630762310f6ae4eb61adda02cb4ad5260 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 22 Oct 2024 17:17:30 -0700 Subject: [PATCH 246/425] [JITLink][MachO] Add convenience functions for default text/data sections. The getMachODefaultTextSection and getMachODefaultRWDataSection functions return the "__TEXT,__text" and "__DATA,__data" sections respectively, creating empty sections if the default sections are not already present in the graph. These functions can be used by utilities that want to add code or data to these standard sections (e.g. these functions can be used to supply the section argument to the createAnonymousPointerJumpStub and createPointerJumpStubBlock functions in the various targets). --- .../llvm/ExecutionEngine/JITLink/MachO.h | 21 ++++ .../Orc/Shared/MachOObjectFormat.h | 1 + .../Orc/Shared/MachOObjectFormat.cpp | 1 + .../ExecutionEngine/JITLink/CMakeLists.txt | 3 +- .../ExecutionEngine/JITLink/JITLinkMocks.cpp | 73 ----------- .../JITLink/JITLinkTestUtils.cpp | 114 ++++++++++++++++++ .../{JITLinkMocks.h => JITLinkTestUtils.h} | 12 +- .../JITLink/LinkGraphTests.cpp | 43 +------ .../JITLink/MachOLinkGraphTests.cpp | 35 ++++++ .../JITLink/MemoryManagerErrorTests.cpp | 2 +- 10 files changed, 184 insertions(+), 121 deletions(-) delete mode 100644 llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp create mode 100644 llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.cpp rename llvm/unittests/ExecutionEngine/JITLink/{JITLinkMocks.h => JITLinkTestUtils.h} (95%) create mode 100644 llvm/unittests/ExecutionEngine/JITLink/MachOLinkGraphTests.cpp diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h index b8432c4d26c68c0..bb8da0ab9db27a5 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h @@ -14,6 +14,7 @@ #define LLVM_EXECUTIONENGINE_JITLINK_MACHO_H #include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h" namespace llvm { namespace jitlink { @@ -33,6 +34,26 @@ createLinkGraphFromMachOObject(MemoryBufferRef ObjectBuffer); void link_MachO(std::unique_ptr G, std::unique_ptr Ctx); +/// Get a pointer to the standard MachO data section (creates an empty +/// section with RW- permissions and standard lifetime if one does not +/// already exist). +inline Section &getMachODefaultRWDataSection(LinkGraph &G) { + if (auto *DataSec = G.findSectionByName(orc::MachODataDataSectionName)) + return *DataSec; + return G.createSection(orc::MachODataDataSectionName, + orc::MemProt::Read | orc::MemProt::Write); +} + +/// Get a pointer to the standard MachO text section (creates an empty +/// section with R-X permissions and standard lifetime if one does not +/// already exist). +inline Section &getMachODefaultTextSection(LinkGraph &G) { + if (auto *TextSec = G.findSectionByName(orc::MachOTextTextSectionName)) + return *TextSec; + return G.createSection(orc::MachOTextTextSectionName, + orc::MemProt::Read | orc::MemProt::Exec); +} + } // end namespace jitlink } // end namespace llvm diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h index f886203f8e3fb57..b927dfbce992a0d 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h @@ -49,6 +49,7 @@ extern StringRef MachOSwift5TypesSectionName; extern StringRef MachOSwift5TypeRefSectionName; extern StringRef MachOSwift5FieldMetadataSectionName; extern StringRef MachOSwift5EntrySectionName; +extern StringRef MachOTextTextSectionName; extern StringRef MachOThreadBSSSectionName; extern StringRef MachOThreadDataSectionName; extern StringRef MachOThreadVarsSectionName; diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp index 7f4c2934d026add..11e8eb7bc3a19b1 100644 --- a/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp @@ -42,6 +42,7 @@ StringRef MachOSwift5TypesSectionName = "__TEXT,__swift5_types"; StringRef MachOSwift5TypeRefSectionName = "__TEXT,__swift5_typeref"; StringRef MachOSwift5FieldMetadataSectionName = "__TEXT,__swift5_fieldmd"; StringRef MachOSwift5EntrySectionName = "__TEXT,__swift5_entry"; +StringRef MachOTextTextSectionName = "__TEXT,__text"; StringRef MachOThreadBSSSectionName = "__DATA,__thread_bss"; StringRef MachOThreadDataSectionName = "__DATA,__thread_data"; StringRef MachOThreadVarsSectionName = "__DATA,__thread_vars"; diff --git a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt index 82d277309307cb9..d1c7b799880a3b7 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt @@ -11,8 +11,9 @@ add_llvm_unittest(JITLinkTests AArch32Tests.cpp AArch32ErrorTests.cpp EHFrameSupportTests.cpp - JITLinkMocks.cpp + JITLinkTestUtils.cpp LinkGraphTests.cpp + MachOLinkGraphTests.cpp MemoryManagerErrorTests.cpp StubsTests.cpp ) diff --git a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp b/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp deleted file mode 100644 index c40ce7adb0b5ea4..000000000000000 --- a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp +++ /dev/null @@ -1,73 +0,0 @@ -//===--------- JITLinkMocks.cpp - Mock APIs for JITLink unit tests --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "JITLinkMocks.h" -#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h" - -#include "llvm/Testing/Support/Error.h" -#include "gtest/gtest.h" - -using namespace llvm; -using namespace llvm::orc; -using namespace llvm::jitlink; - -void lookupResolveEverythingToNull( - const llvm::jitlink::JITLinkContext::LookupMap &Symbols, - std::unique_ptr LC) { - llvm::orc::ExecutorAddr Null; - llvm::jitlink::AsyncLookupResult Result; - for (auto &KV : Symbols) - Result[KV.first] = {Null, llvm::JITSymbolFlags::Exported}; - LC->run(std::move(Result)); -} - -void lookupErrorOut( - const llvm::jitlink::JITLinkContext::LookupMap &Symbols, - std::unique_ptr LC) { - LC->run(llvm::make_error("Lookup failed", - llvm::inconvertibleErrorCode())); -} - -std::unique_ptr makeMockContext( - llvm::unique_function HandleFailed, - llvm::unique_function SetupMemMgr, - llvm::unique_function SetupContext) { - auto MemMgr = std::make_unique(); - SetupMemMgr(*MemMgr); - auto Ctx = std::make_unique(std::move(MemMgr), - std::move(HandleFailed)); - SetupContext(*Ctx); - return Ctx; -} - -void defaultMemMgrSetup(MockJITLinkMemoryManager &) {} -void defaultCtxSetup(MockJITLinkContext &) {} - -TEST(JITLinkMocks, SmokeTest) { - // Check that the testing infrastructure defaults can "link" a graph - // successfully. - auto G = std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, - llvm::endianness::little, - getGenericEdgeKindName); - - ArrayRef Content = "hello, world!"; - auto &Sec = - G->createSection("__data", orc::MemProt::Read | orc::MemProt::Write); - orc::ExecutorAddr B1Addr(0x1000); - auto &B = G->createContentBlock(Sec, Content, B1Addr, 8, 0); - G->addDefinedSymbol(B, 4, "S", 4, Linkage::Strong, Scope::Default, false, - false); - - Error Err = Error::success(); - auto Ctx = - makeMockContext(JoinErrorsInto(Err), defaultMemMgrSetup, defaultCtxSetup); - - link_MachO_x86_64(std::move(G), std::move(Ctx)); - - EXPECT_THAT_ERROR(std::move(Err), Succeeded()); -} diff --git a/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.cpp b/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.cpp new file mode 100644 index 000000000000000..9a7878edab5045b --- /dev/null +++ b/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.cpp @@ -0,0 +1,114 @@ +//===------- JITLinkTestUtils.cpp - Utilities for JITLink unit tests ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "JITLinkTestUtils.h" +#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h" + +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::orc; +using namespace llvm::jitlink; + +static const char BlockContentBytes[] = { + 0x54, 0x68, 0x65, 0x72, 0x65, 0x20, 0x77, 0x61, 0x73, 0x20, 0x6d, 0x6f, + 0x76, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x20, 0x61, 0x74, 0x20, 0x74, 0x68, + 0x65, 0x20, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20, 0x66, + 0x6f, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x6f, 0x72, 0x64, 0x20, + 0x68, 0x61, 0x64, 0x20, 0x70, 0x61, 0x73, 0x73, 0x65, 0x64, 0x20, 0x61, + 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x0a, 0x54, 0x68, 0x61, 0x74, 0x20, 0x74, + 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6c, 0x74, 0x20, 0x66, 0x72, 0x6f, 0x6d, + 0x20, 0x4f, 0x6c, 0x64, 0x20, 0x52, 0x65, 0x67, 0x72, 0x65, 0x74, 0x20, + 0x68, 0x61, 0x64, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x77, 0x61, 0x79, + 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x68, 0x61, 0x64, 0x20, 0x6a, 0x6f, + 0x69, 0x6e, 0x65, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x69, 0x6c, + 0x64, 0x20, 0x62, 0x75, 0x73, 0x68, 0x20, 0x68, 0x6f, 0x72, 0x73, 0x65, + 0x73, 0x20, 0x2d, 0x2d, 0x20, 0x68, 0x65, 0x20, 0x77, 0x61, 0x73, 0x20, + 0x77, 0x6f, 0x72, 0x74, 0x68, 0x20, 0x61, 0x20, 0x74, 0x68, 0x6f, 0x75, + 0x73, 0x61, 0x6e, 0x64, 0x20, 0x70, 0x6f, 0x75, 0x6e, 0x64, 0x2c, 0x0a, + 0x53, 0x6f, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, + 0x72, 0x61, 0x63, 0x6b, 0x73, 0x20, 0x68, 0x61, 0x64, 0x20, 0x67, 0x61, + 0x74, 0x68, 0x65, 0x72, 0x65, 0x64, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, + 0x65, 0x20, 0x66, 0x72, 0x61, 0x79, 0x2e, 0x0a, 0x41, 0x6c, 0x6c, 0x20, + 0x74, 0x68, 0x65, 0x20, 0x74, 0x72, 0x69, 0x65, 0x64, 0x20, 0x61, 0x6e, + 0x64, 0x20, 0x6e, 0x6f, 0x74, 0x65, 0x64, 0x20, 0x72, 0x69, 0x64, 0x65, + 0x72, 0x73, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65, 0x20, + 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x20, 0x6e, 0x65, 0x61, + 0x72, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x66, 0x61, 0x72, 0x0a, 0x48, 0x61, + 0x64, 0x20, 0x6d, 0x75, 0x73, 0x74, 0x65, 0x72, 0x65, 0x64, 0x20, 0x61, + 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x68, 0x6f, 0x6d, 0x65, 0x73, 0x74, + 0x65, 0x61, 0x64, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x6e, 0x69, 0x67, 0x68, + 0x74, 0x2c, 0x0a, 0x46, 0x6f, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x62, + 0x75, 0x73, 0x68, 0x6d, 0x65, 0x6e, 0x20, 0x6c, 0x6f, 0x76, 0x65, 0x20, + 0x68, 0x61, 0x72, 0x64, 0x20, 0x72, 0x69, 0x64, 0x69, 0x6e, 0x67, 0x20, + 0x77, 0x68, 0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x69, + 0x6c, 0x64, 0x20, 0x62, 0x75, 0x73, 0x68, 0x20, 0x68, 0x6f, 0x72, 0x73, + 0x65, 0x73, 0x20, 0x61, 0x72, 0x65, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, + 0x74, 0x68, 0x65, 0x20, 0x73, 0x74, 0x6f, 0x63, 0x6b, 0x2d, 0x68, 0x6f, + 0x72, 0x73, 0x65, 0x20, 0x73, 0x6e, 0x75, 0x66, 0x66, 0x73, 0x20, 0x74, + 0x68, 0x65, 0x20, 0x62, 0x61, 0x74, 0x74, 0x6c, 0x65, 0x20, 0x77, 0x69, + 0x74, 0x68, 0x20, 0x64, 0x65, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x00}; + +ArrayRef BlockContent(BlockContentBytes); + +void lookupResolveEverythingToNull( + const llvm::jitlink::JITLinkContext::LookupMap &Symbols, + std::unique_ptr LC) { + llvm::orc::ExecutorAddr Null; + llvm::jitlink::AsyncLookupResult Result; + for (auto &KV : Symbols) + Result[KV.first] = {Null, llvm::JITSymbolFlags::Exported}; + LC->run(std::move(Result)); +} + +void lookupErrorOut( + const llvm::jitlink::JITLinkContext::LookupMap &Symbols, + std::unique_ptr LC) { + LC->run(llvm::make_error("Lookup failed", + llvm::inconvertibleErrorCode())); +} + +std::unique_ptr makeMockContext( + llvm::unique_function HandleFailed, + llvm::unique_function SetupMemMgr, + llvm::unique_function SetupContext) { + auto MemMgr = std::make_unique(); + SetupMemMgr(*MemMgr); + auto Ctx = std::make_unique(std::move(MemMgr), + std::move(HandleFailed)); + SetupContext(*Ctx); + return Ctx; +} + +void defaultMemMgrSetup(MockJITLinkMemoryManager &) {} +void defaultCtxSetup(MockJITLinkContext &) {} + +TEST(JITLinkMocks, SmokeTest) { + // Check that the testing infrastructure defaults can "link" a graph + // successfully. + auto G = std::make_unique("foo", Triple("x86_64-apple-darwin"), 8, + llvm::endianness::little, + getGenericEdgeKindName); + + ArrayRef Content = "hello, world!"; + auto &Sec = + G->createSection("__data", orc::MemProt::Read | orc::MemProt::Write); + orc::ExecutorAddr B1Addr(0x1000); + auto &B = G->createContentBlock(Sec, Content, B1Addr, 8, 0); + G->addDefinedSymbol(B, 4, "S", 4, Linkage::Strong, Scope::Default, false, + false); + + Error Err = Error::success(); + auto Ctx = + makeMockContext(JoinErrorsInto(Err), defaultMemMgrSetup, defaultCtxSetup); + + link_MachO_x86_64(std::move(G), std::move(Ctx)); + + EXPECT_THAT_ERROR(std::move(Err), Succeeded()); +} diff --git a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.h b/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.h similarity index 95% rename from llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.h rename to llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.h index 8c1e3ff2c77db58..dc077f900d19585 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.h +++ b/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.h @@ -1,4 +1,4 @@ -//===----- JITLinkMocks.h - Mock APIs for JITLink unit tests ----*- C++ -*-===// +//===--- JITLinkTestUtils.h - Utilities for JITLink unit tests --*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// // -// Mock APIs for JITLink unit tests. +// Utilities for JITLink unit tests. // //===----------------------------------------------------------------------===// -#ifndef LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKMOCKS_H -#define LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKMOCKS_H +#ifndef LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKTESTUTILS_H +#define LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKTESTUTILS_H #include "llvm/ExecutionEngine/JITLink/JITLink.h" @@ -225,4 +225,6 @@ class JoinErrorsInto { llvm::Error &Err; }; -#endif // LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKMOCKS_H +extern llvm::ArrayRef BlockContent; + +#endif // LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKTESTUTILS_H diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp index 5eea21184619522..32d917d75d5ca42 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "JITLinkTestUtils.h" + #include "llvm/ADT/STLExtras.h" #include "llvm/ExecutionEngine/JITLink/JITLink.h" #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h" @@ -17,47 +19,6 @@ using namespace llvm; using namespace llvm::jitlink; -static const char BlockContentBytes[] = { - 0x54, 0x68, 0x65, 0x72, 0x65, 0x20, 0x77, 0x61, 0x73, 0x20, 0x6d, 0x6f, - 0x76, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x20, 0x61, 0x74, 0x20, 0x74, 0x68, - 0x65, 0x20, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20, 0x66, - 0x6f, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x6f, 0x72, 0x64, 0x20, - 0x68, 0x61, 0x64, 0x20, 0x70, 0x61, 0x73, 0x73, 0x65, 0x64, 0x20, 0x61, - 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x0a, 0x54, 0x68, 0x61, 0x74, 0x20, 0x74, - 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6c, 0x74, 0x20, 0x66, 0x72, 0x6f, 0x6d, - 0x20, 0x4f, 0x6c, 0x64, 0x20, 0x52, 0x65, 0x67, 0x72, 0x65, 0x74, 0x20, - 0x68, 0x61, 0x64, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x77, 0x61, 0x79, - 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x68, 0x61, 0x64, 0x20, 0x6a, 0x6f, - 0x69, 0x6e, 0x65, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x69, 0x6c, - 0x64, 0x20, 0x62, 0x75, 0x73, 0x68, 0x20, 0x68, 0x6f, 0x72, 0x73, 0x65, - 0x73, 0x20, 0x2d, 0x2d, 0x20, 0x68, 0x65, 0x20, 0x77, 0x61, 0x73, 0x20, - 0x77, 0x6f, 0x72, 0x74, 0x68, 0x20, 0x61, 0x20, 0x74, 0x68, 0x6f, 0x75, - 0x73, 0x61, 0x6e, 0x64, 0x20, 0x70, 0x6f, 0x75, 0x6e, 0x64, 0x2c, 0x0a, - 0x53, 0x6f, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, - 0x72, 0x61, 0x63, 0x6b, 0x73, 0x20, 0x68, 0x61, 0x64, 0x20, 0x67, 0x61, - 0x74, 0x68, 0x65, 0x72, 0x65, 0x64, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, - 0x65, 0x20, 0x66, 0x72, 0x61, 0x79, 0x2e, 0x0a, 0x41, 0x6c, 0x6c, 0x20, - 0x74, 0x68, 0x65, 0x20, 0x74, 0x72, 0x69, 0x65, 0x64, 0x20, 0x61, 0x6e, - 0x64, 0x20, 0x6e, 0x6f, 0x74, 0x65, 0x64, 0x20, 0x72, 0x69, 0x64, 0x65, - 0x72, 0x73, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65, 0x20, - 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x20, 0x6e, 0x65, 0x61, - 0x72, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x66, 0x61, 0x72, 0x0a, 0x48, 0x61, - 0x64, 0x20, 0x6d, 0x75, 0x73, 0x74, 0x65, 0x72, 0x65, 0x64, 0x20, 0x61, - 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x68, 0x6f, 0x6d, 0x65, 0x73, 0x74, - 0x65, 0x61, 0x64, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x6e, 0x69, 0x67, 0x68, - 0x74, 0x2c, 0x0a, 0x46, 0x6f, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x62, - 0x75, 0x73, 0x68, 0x6d, 0x65, 0x6e, 0x20, 0x6c, 0x6f, 0x76, 0x65, 0x20, - 0x68, 0x61, 0x72, 0x64, 0x20, 0x72, 0x69, 0x64, 0x69, 0x6e, 0x67, 0x20, - 0x77, 0x68, 0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x69, - 0x6c, 0x64, 0x20, 0x62, 0x75, 0x73, 0x68, 0x20, 0x68, 0x6f, 0x72, 0x73, - 0x65, 0x73, 0x20, 0x61, 0x72, 0x65, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, - 0x74, 0x68, 0x65, 0x20, 0x73, 0x74, 0x6f, 0x63, 0x6b, 0x2d, 0x68, 0x6f, - 0x72, 0x73, 0x65, 0x20, 0x73, 0x6e, 0x75, 0x66, 0x66, 0x73, 0x20, 0x74, - 0x68, 0x65, 0x20, 0x62, 0x61, 0x74, 0x74, 0x6c, 0x65, 0x20, 0x77, 0x69, - 0x74, 0x68, 0x20, 0x64, 0x65, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x00}; - -static ArrayRef BlockContent(BlockContentBytes); - TEST(LinkGraphTest, Construction) { // Check that LinkGraph construction works as expected. LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little, diff --git a/llvm/unittests/ExecutionEngine/JITLink/MachOLinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/MachOLinkGraphTests.cpp new file mode 100644 index 000000000000000..be922275be26f49 --- /dev/null +++ b/llvm/unittests/ExecutionEngine/JITLink/MachOLinkGraphTests.cpp @@ -0,0 +1,35 @@ +//===------ LinkGraphTests.cpp - Unit tests for core JITLink classes ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "JITLinkTestUtils.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/JITLink/MachO.h" + +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::jitlink; + +TEST(MachOLinkGraphTest, GetStandardSections) { + // Check that LinkGraph construction works as expected. + LinkGraph G("foo", Triple("arm64-apple-darwin"), 8, llvm::endianness::little, + getGenericEdgeKindName); + + auto &Data = getMachODefaultRWDataSection(G); + EXPECT_TRUE(Data.empty()); + EXPECT_EQ(Data.getName(), orc::MachODataDataSectionName); + EXPECT_EQ(Data.getMemProt(), orc::MemProt::Read | orc::MemProt::Write); + + auto &Text = getMachODefaultTextSection(G); + EXPECT_TRUE(Text.empty()); + EXPECT_EQ(Text.getName(), orc::MachOTextTextSectionName); + EXPECT_EQ(Text.getMemProt(), orc::MemProt::Read | orc::MemProt::Exec); +} diff --git a/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp index f0f3dd117c6f883..2b303f7a8c1a298 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "JITLinkMocks.h" +#include "JITLinkTestUtils.h" #include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h" #include "llvm/Testing/Support/Error.h" From 0c1c37bfbed08c9d4e414a10f46cbed9a3e4c870 Mon Sep 17 00:00:00 2001 From: c8ef Date: Tue, 29 Oct 2024 10:08:38 +0800 Subject: [PATCH 247/425] [TLI] Add support for the `tgamma` libcall. (#113791) This patch adds the `tgamma` libcall. --- .../llvm/Analysis/TargetLibraryInfo.def | 15 ++++++++++++++ llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 3 +++ .../Transforms/InferFunctionAttrs/annotate.ll | 9 +++++++++ .../tools/llvm-tli-checker/ps4-tli-check.yaml | 20 +++++++++++++++---- .../Analysis/TargetLibraryInfoTest.cpp | 3 +++ 5 files changed, 46 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index f890e2b9ec4c82a..3e23e398f6a7976 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1140,6 +1140,21 @@ TLI_DEFINE_ENUM_INTERNAL(erfl) TLI_DEFINE_STRING_INTERNAL("erfl") TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl) +/// double tgamma(double x); +TLI_DEFINE_ENUM_INTERNAL(tgamma) +TLI_DEFINE_STRING_INTERNAL("tgamma") +TLI_DEFINE_SIG_INTERNAL(Dbl, Dbl) + +/// float tgammaf(float x); +TLI_DEFINE_ENUM_INTERNAL(tgammaf) +TLI_DEFINE_STRING_INTERNAL("tgammaf") +TLI_DEFINE_SIG_INTERNAL(Flt, Flt) + +/// long double tgammal(long double x); +TLI_DEFINE_ENUM_INTERNAL(tgammal) +TLI_DEFINE_STRING_INTERNAL("tgammal") +TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl) + /// int execl(const char *path, const char *arg, ...); TLI_DEFINE_ENUM_INTERNAL(execl) TLI_DEFINE_STRING_INTERNAL("execl") diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 13323604eb514a8..5fd4fd78c28a953 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1179,6 +1179,9 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F, case LibFunc_erf: case LibFunc_erff: case LibFunc_erfl: + case LibFunc_tgamma: + case LibFunc_tgammaf: + case LibFunc_tgammal: case LibFunc_exp: case LibFunc_expf: case LibFunc_expl: diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll index 3e9b2d94efda89d..d8266f4c6703dd6 100644 --- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll @@ -667,6 +667,15 @@ declare float @logf(float) ; CHECK: declare x86_fp80 @logl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] declare x86_fp80 @logl(x86_fp80) +; CHECK: declare double @tgamma(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare double @tgamma(double) + +; CHECK: declare float @tgammaf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare float @tgammaf(float) + +; CHECK: declare x86_fp80 @tgammal(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]] +declare x86_fp80 @tgammal(x86_fp80) + ; CHECK: declare noundef i32 @lstat(ptr nocapture noundef readonly, ptr nocapture noundef) [[NOFREE_NOUNWIND]] declare i32 @lstat(ptr, ptr) diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml index 20e7e15e3efb551..408b9c39934286f 100644 --- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml +++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml @@ -34,7 +34,7 @@ # # CHECK: << Total TLI yes SDK no: 18 # CHECK: >> Total TLI no SDK yes: 0 -# CHECK: == Total TLI yes SDK yes: 265 +# CHECK: == Total TLI yes SDK yes: 268 # # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*) # WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int) @@ -48,14 +48,14 @@ # WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl' # WRONG_SUMMARY: << Total TLI yes SDK no: 19{{$}} # WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}} -# WRONG_SUMMARY: == Total TLI yes SDK yes: 264 +# WRONG_SUMMARY: == Total TLI yes SDK yes: 267 # ## The -COUNT suffix doesn't care if there are too many matches, so check ## the exact count first; the two directives should add up to that. ## Yes, this means additions to TLI will fail this test, but the argument ## to -COUNT can't be an expression. -# AVAIL: TLI knows 516 symbols, 283 available -# AVAIL-COUNT-283: {{^}} available +# AVAIL: TLI knows 519 symbols, 286 available +# AVAIL-COUNT-286: {{^}} available # AVAIL-NOT: {{^}} available # UNAVAIL-COUNT-233: not available # UNAVAIL-NOT: not available @@ -390,6 +390,18 @@ DynamicSymbols: Type: STT_FUNC Section: .text Binding: STB_GLOBAL + - Name: tgamma + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: tgammaf + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + - Name: tgammal + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL - Name: exp Type: STT_FUNC Section: .text diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index 346940384aff910..98f8989d4e6e9e9 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -303,6 +303,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare double @erf(double)\n" "declare float @erff(float)\n" "declare x86_fp80 @erfl(x86_fp80)\n" + "declare double @tgamma(double)\n" + "declare float @tgammaf(float)\n" + "declare x86_fp80 @tgammal(x86_fp80)\n" "declare i32 @printf(i8*, ...)\n" "declare i32 @putc(i32, %struct*)\n" "declare i32 @putc_unlocked(i32, %struct*)\n" From 18311093abe6481388a0d963d58438d743b47569 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 29 Oct 2024 10:39:07 +0800 Subject: [PATCH 248/425] [InstCombine] Do not fold `shufflevector(select)` if the select condition is a vector (#113993) Since `shufflevector` is not element-wise, we cannot do fold it into select when the select condition is a vector. For shufflevector that doesn't change the length, it doesn't crash, but it is still a miscompilation: https://alive2.llvm.org/ce/z/s8saCx Fixes https://github.com/llvm/llvm-project/issues/113986. --- .../InstCombine/InstCombineVectorOps.cpp | 8 +++++-- .../Transforms/InstCombine/vec_shuffle.ll | 24 +++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 75e7c1c97018cb1..454fe5a91d375a3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2902,8 +2902,12 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (match(RHS, m_Constant())) { if (auto *SI = dyn_cast(LHS)) { - if (Instruction *I = FoldOpIntoSelect(SVI, SI)) - return I; + // We cannot do this fold for elementwise select since ShuffleVector is + // not elementwise. + if (SI->getCondition()->getType()->isIntegerTy()) { + if (Instruction *I = FoldOpIntoSelect(SVI, SI)) + return I; + } } if (auto *PN = dyn_cast(LHS)) { if (Instruction *I = foldOpIntoPhi(SVI, PN)) diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index d050cf10849e3cc..39a9db02eef2930 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -2387,6 +2387,30 @@ define <2 x i32> @foldselect0(i1 %c) { ret <2 x i32> %shuf } +; Make sure we do not crash in this case. +define <4 x float> @shuf_larger_length_vec_select(<2 x i1> %cond) { +; CHECK-LABEL: @shuf_larger_length_vec_select( +; CHECK-NEXT: [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x float> zeroinitializer, <2 x float> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[SEL]], <2 x float> zeroinitializer, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[SHUF]] +; + %sel = select <2 x i1> %cond, <2 x float> zeroinitializer, <2 x float> splat(float 1.000000e+00) + %shuf = shufflevector <2 x float> %sel, <2 x float> zeroinitializer, <4 x i32> + ret <4 x float> %shuf +} + +; Make sure we do not fold in this case. +define <4 x i32> @shuf_same_length_vec_select(<4 x i1> %cond) { +; CHECK-LABEL: @shuf_same_length_vec_select( +; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[SEL]], <4 x i32> , <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; + %sel = select <4 x i1> %cond, <4 x i32> , <4 x i32> + %shuf = shufflevector <4 x i32> %sel, <4 x i32> , <4 x i32> + ret <4 x i32> %shuf +} + declare i1 @cond() declare <4 x i32> @value() From 635c344dfb3227f80c76dfbee9d6bf44ef742675 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Oct 2024 19:59:00 -0700 Subject: [PATCH 249/425] [X86] Add vector_compress patterns with a zero vector passthru. (#113970) We can use the kz form to automatically zero the extra elements. Fixes #113263. --- llvm/lib/Target/X86/X86InstrAVX512.td | 3 ++ llvm/test/CodeGen/X86/vector-compress.ll | 56 ++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 98c31867e6b22b0..32c4ebc331f1d72 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -10549,6 +10549,9 @@ multiclass compress_by_vec_width_lowering { def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, undef)), (!cast(Name#_.ZSuffix#rrkz) _.KRCWM:$mask, _.RC:$src)>; + def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.ImmAllZerosV)), + (!cast(Name#_.ZSuffix#rrkz) + _.KRCWM:$mask, _.RC:$src)>; def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.RC:$passthru)), (!cast(Name#_.ZSuffix#rrk) _.RC:$passthru, _.KRCWM:$mask, _.RC:$src)>; diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll index 2b963ab896cc9e1..f8c076db65de949 100644 --- a/llvm/test/CodeGen/X86/vector-compress.ll +++ b/llvm/test/CodeGen/X86/vector-compress.ll @@ -1211,3 +1211,59 @@ define <3 x i3> @test_compress_narrow_illegal_element_type(<3 x i3> %vec, <3 x i %out = call <3 x i3> @llvm.experimental.vector.compress(<3 x i3> %vec, <3 x i1> %mask, <3 x i3> undef) ret <3 x i3> %out } + +define <4 x i32> @test_compress_v4i32_zero_passthru(<4 x i32> %vec, <4 x i1> %mask) { +; AVX2-LABEL: test_compress_v4i32_zero_passthru: +; AVX2: # %bb.0: +; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 +; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vmovaps %xmm2, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; AVX2-NEXT: vmovd %xmm1, %eax +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: vextractps $1, %xmm0, -24(%rsp,%rax,4) +; AVX2-NEXT: vpextrd $1, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: addq %rax, %rcx +; AVX2-NEXT: vextractps $2, %xmm0, -24(%rsp,%rcx,4) +; AVX2-NEXT: vpextrd $2, %xmm1, %eax +; AVX2-NEXT: andl $1, %eax +; AVX2-NEXT: addq %rcx, %rax +; AVX2-NEXT: vpextrd $3, %xmm1, %ecx +; AVX2-NEXT: andl $1, %ecx +; AVX2-NEXT: addq %rax, %rcx +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax def $rax +; AVX2-NEXT: andl $3, %eax +; AVX2-NEXT: vextractps $3, %xmm0, -24(%rsp,%rax,4) +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: cmpq $3, %rcx +; AVX2-NEXT: movl $3, %edx +; AVX2-NEXT: cmovbq %rcx, %rdx +; AVX2-NEXT: vextractps $3, %xmm0, %ecx +; AVX2-NEXT: cmovbel %eax, %ecx +; AVX2-NEXT: movl %ecx, -24(%rsp,%rdx,4) +; AVX2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: test_compress_v4i32_zero_passthru: +; AVX512F: # %bb.0: +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512F-NEXT: kshiftlw $12, %k0, %k0 +; AVX512F-NEXT: kshiftrw $12, %k0, %k1 +; AVX512F-NEXT: vpcompressd %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: test_compress_v4i32_zero_passthru: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpslld $31, %xmm1, %xmm1 +; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1 +; AVX512VL-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} +; AVX512VL-NEXT: retq + %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> zeroinitializer) + ret <4 x i32> %out +} From 00ca2071e08f3a82171e564618981906a15e8dca Mon Sep 17 00:00:00 2001 From: Jerry Sun <105613447+jerryyiransun@users.noreply.github.com> Date: Mon, 28 Oct 2024 23:26:10 -0400 Subject: [PATCH 250/425] [TableGen] [NFC] Remove unused includes in TableGen BE (#113725) split PR as requested from https://github.com/llvm/llvm-project/pull/113318. Removes unused imports in TableGen BE --- llvm/utils/TableGen/Attributes.cpp | 1 - llvm/utils/TableGen/CTagsEmitter.cpp | 2 +- llvm/utils/TableGen/DXILEmitter.cpp | 1 - llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp | 2 -- llvm/utils/TableGen/GlobalISelEmitter.cpp | 1 - llvm/utils/TableGen/IntrinsicEmitter.cpp | 1 - llvm/utils/TableGen/OptionParserEmitter.cpp | 1 - llvm/utils/TableGen/PseudoLoweringEmitter.cpp | 1 - llvm/utils/TableGen/VTEmitter.cpp | 1 - 9 files changed, 1 insertion(+), 10 deletions(-) diff --git a/llvm/utils/TableGen/Attributes.cpp b/llvm/utils/TableGen/Attributes.cpp index 138275356dc9546..66ba25c6dcc87d4 100644 --- a/llvm/utils/TableGen/Attributes.cpp +++ b/llvm/utils/TableGen/Attributes.cpp @@ -9,7 +9,6 @@ #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" -#include using namespace llvm; #define DEBUG_TYPE "attr-enum" diff --git a/llvm/utils/TableGen/CTagsEmitter.cpp b/llvm/utils/TableGen/CTagsEmitter.cpp index 3718486ff7ad4e6..413d8f5dbcff074 100644 --- a/llvm/utils/TableGen/CTagsEmitter.cpp +++ b/llvm/utils/TableGen/CTagsEmitter.cpp @@ -17,7 +17,7 @@ #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" -#include +#include #include using namespace llvm; diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index 0598baea9be7a23..467a6163ae3b0c1 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -14,7 +14,6 @@ #include "Basic/SequenceToOffsetTable.h" #include "Common/CodeGenTarget.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp index 424f1ccb067f905..149ba7a1d9032d3 100644 --- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp @@ -40,9 +40,7 @@ #include "Common/SubtargetFeatureInfo.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index e866bd983e04ea6..859310906af4686 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -38,7 +38,6 @@ #include "Common/GlobalISel/GlobalISelMatchTable.h" #include "Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.h" #include "Common/InfoByHwMode.h" -#include "Common/SubtargetFeatureInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/CodeGenTypes/MachineValueType.h" diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 1968e7eac21e337..070d7522a97be9f 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp index 2872762cc7fd96e..cd7a140bb23143b 100644 --- a/llvm/utils/TableGen/OptionParserEmitter.cpp +++ b/llvm/utils/TableGen/OptionParserEmitter.cpp @@ -15,7 +15,6 @@ #include "llvm/TableGen/TableGenBackend.h" #include #include -#include using namespace llvm; diff --git a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp index d2d2bd91445a141..bcbc6ea20751fe7 100644 --- a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp +++ b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp @@ -17,7 +17,6 @@ #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TGTimer.h" #include "llvm/TableGen/TableGenBackend.h" -#include using namespace llvm; #define DEBUG_TYPE "pseudo-lowering" diff --git a/llvm/utils/TableGen/VTEmitter.cpp b/llvm/utils/TableGen/VTEmitter.cpp index 8f4bcd5fccc73d0..4cbc7abd699d204 100644 --- a/llvm/utils/TableGen/VTEmitter.cpp +++ b/llvm/utils/TableGen/VTEmitter.cpp @@ -10,7 +10,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" -#include #include #include using namespace llvm; From 6588073724d3241d90663e45154d806a28bce95a Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Tue, 29 Oct 2024 13:19:43 +0900 Subject: [PATCH 251/425] [mlir][func] Fix incorrect API usage in `FuncOpConversion` (#113977) This commit fixes a case of incorrect dialect conversion API usage during `FuncOpConversion`. `replaceAllUsesExcept` (same as `replaceAllUsesWith`) is currently not supported in a dialect conversion. `replaceUsesOfBlockArgument` should be used instead. It sometimes works anyway (like in this case), but that's just because of the way we insert materializations. This commit is in preparation of merging the 1:1 and 1:N dialect conversion drivers. (At that point, the current use of `replaceAllUsesExcept` will no longer work.) --- mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp index 27c43e0daad0728..c046ea1b824fc85 100644 --- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp +++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp @@ -273,7 +273,7 @@ static void wrapExternalFunction(OpBuilder &builder, Location loc, static void restoreByValRefArgumentType( ConversionPatternRewriter &rewriter, const LLVMTypeConverter &typeConverter, ArrayRef> byValRefNonPtrAttrs, - LLVM::LLVMFuncOp funcOp) { + ArrayRef oldBlockArgs, LLVM::LLVMFuncOp funcOp) { // Nothing to do for function declarations. if (funcOp.isExternal()) return; @@ -281,8 +281,8 @@ static void restoreByValRefArgumentType( ConversionPatternRewriter::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(&funcOp.getFunctionBody().front()); - for (const auto &[arg, byValRefAttr] : - llvm::zip(funcOp.getArguments(), byValRefNonPtrAttrs)) { + for (const auto &[arg, oldArg, byValRefAttr] : + llvm::zip(funcOp.getArguments(), oldBlockArgs, byValRefNonPtrAttrs)) { // Skip argument if no `llvm.byval` or `llvm.byref` attribute. if (!byValRefAttr) continue; @@ -295,7 +295,7 @@ static void restoreByValRefArgumentType( cast(byValRefAttr->getValue()).getValue()); auto valueArg = rewriter.create(arg.getLoc(), resTy, arg); - rewriter.replaceAllUsesExcept(arg, valueArg, valueArg); + rewriter.replaceUsesOfBlockArgument(oldArg, valueArg); } } @@ -309,6 +309,10 @@ mlir::convertFuncOpToLLVMFuncOp(FunctionOpInterface funcOp, return rewriter.notifyMatchFailure( funcOp, "Only support FunctionOpInterface with FunctionType"); + // Keep track of the entry block arguments. They will be needed later. + SmallVector oldBlockArgs = + llvm::to_vector(funcOp.getArguments()); + // Convert the original function arguments. They are converted using the // LLVMTypeConverter provided to this legalization pattern. auto varargsAttr = funcOp->getAttrOfType(varargsAttrName); @@ -438,7 +442,7 @@ mlir::convertFuncOpToLLVMFuncOp(FunctionOpInterface funcOp, // pointee type in the function body when converting `llvm.byval`/`llvm.byref` // function arguments. restoreByValRefArgumentType(rewriter, converter, byValRefNonPtrAttrs, - newFuncOp); + oldBlockArgs, newFuncOp); if (!shouldUseBarePtrCallConv(funcOp, &converter)) { if (funcOp->getAttrOfType( From 828467a54e156cbb04820ae47df32c45fbc75fc0 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 29 Oct 2024 14:15:43 +0900 Subject: [PATCH 252/425] Fix warnings introduced in #111434 [-Wnontrivial-memaccess] --- lld/ELF/Symbols.h | 2 +- llvm/include/llvm/Support/Endian.h | 2 +- llvm/lib/ExecutionEngine/ExecutionEngine.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index 339f32e05f16254..8c9c9a56cfbc720 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -75,7 +75,7 @@ class Symbol { // The default copy constructor is deleted due to atomic flags. Define one for // places where no atomic is needed. - Symbol(const Symbol &o) { memcpy(this, &o, sizeof(o)); } + Symbol(const Symbol &o) { memcpy(static_cast(this), &o, sizeof(o)); } protected: const char *nameData; diff --git a/llvm/include/llvm/Support/Endian.h b/llvm/include/llvm/Support/Endian.h index 5831fe66a1f7b71..f86ea8901ae46b8 100644 --- a/llvm/include/llvm/Support/Endian.h +++ b/llvm/include/llvm/Support/Endian.h @@ -58,7 +58,7 @@ template [[nodiscard]] inline value_type read(const void *memory, endianness endian) { value_type ret; - memcpy(&ret, + memcpy(static_cast(&ret), LLVM_ASSUME_ALIGNED( memory, (detail::PickAlignment::value)), sizeof(value_type)); diff --git a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp index f09975331bba84c..42622ea12152ab7 100644 --- a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp @@ -1056,7 +1056,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, *((double*)Ptr) = Val.DoubleVal; break; case Type::X86_FP80TyID: - memcpy(Ptr, Val.IntVal.getRawData(), 10); + memcpy(static_cast(Ptr), Val.IntVal.getRawData(), 10); break; case Type::PointerTyID: // Ensure 64 bit target pointers are fully initialized on 32 bit hosts. From d3b98559be72682da45df73522173cb315912f6f Mon Sep 17 00:00:00 2001 From: Peng Liu Date: Tue, 29 Oct 2024 01:29:37 -0400 Subject: [PATCH 253/425] Add exception guard for constructor vector(n, x, a) (#113086) Added exception guard to the `vector(n, x, a)` constructor to enhance exception safety. This change ensures that the `vector(n, x, a)` constructor is consistent with other constructors, such as `vector(n)`, `vector(n, x)`, `vector(n, a)`, in terms of exception safety. --- libcxx/include/__vector/vector.h | 2 ++ .../sequences/vector/vector.cons/exceptions.pass.cpp | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h index 7889e8c2201ac19..844e5d6a2105687 100644 --- a/libcxx/include/__vector/vector.h +++ b/libcxx/include/__vector/vector.h @@ -165,10 +165,12 @@ class _LIBCPP_TEMPLATE_VIS vector { _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI vector(size_type __n, const value_type& __x, const allocator_type& __a) : __alloc_(__a) { + auto __guard = std::__make_exception_guard(__destroy_vector(*this)); if (__n > 0) { __vallocate(__n); __construct_at_end(__n, __x); } + __guard.__complete(); } template = 14 + try { // Throw in vector(size_type, value_type, const allocator_type&) from the type + int throw_after = 1; + ThrowingT v(throw_after); + std::vector vec(1, v, std::allocator()); + } catch (int) { + } + check_new_delete_called(); + try { // Throw in vector(InputIterator, InputIterator) from input iterator std::vector vec((Iterator()), Iterator(2)); } catch (int) { From 3f4468faaa9525ad615118675c3c68938f4a8d5f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 28 Oct 2024 22:37:47 -0700 Subject: [PATCH 254/425] [RISCV] Teach expandRV32ZdinxStore to handle memoperand not being present. (#113981) I received a report that the outliner drops memoperands and causes this code to crash. Handle this by only copying the memoperand if it exists. Similar for expandRV32ZdinxLoad --- .../Target/RISCV/RISCVExpandPseudoInsts.cpp | 89 ++++++++++--------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 5dcec078856eadd..eb3e1a1fe9fd5e7 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -320,34 +320,37 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd); - assert(MBBI->hasOneMemOperand() && "Expected mem operand"); - MachineMemOperand *OldMMO = MBBI->memoperands().front(); - MachineFunction *MF = MBB.getParent(); - MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4); - MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4); - - BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) - .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill())) - .addReg(MBBI->getOperand(1).getReg()) - .add(MBBI->getOperand(2)) - .setMemRefs(MMOLo); + auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) + .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill())) + .addReg(MBBI->getOperand(1).getReg()) + .add(MBBI->getOperand(2)); + MachineInstrBuilder MIBHi; if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) { assert(MBBI->getOperand(2).getOffset() % 8 == 0); MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4); - BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) - .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill())) - .add(MBBI->getOperand(1)) - .add(MBBI->getOperand(2)) - .setMemRefs(MMOHi); + MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) + .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill())) + .add(MBBI->getOperand(1)) + .add(MBBI->getOperand(2)); } else { assert(isInt<12>(MBBI->getOperand(2).getImm() + 4)); - BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) - .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill())) - .add(MBBI->getOperand(1)) - .addImm(MBBI->getOperand(2).getImm() + 4) - .setMemRefs(MMOHi); + MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) + .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill())) + .add(MBBI->getOperand(1)) + .addImm(MBBI->getOperand(2).getImm() + 4); + } + + if (!MBBI->memoperands_empty()) { + assert(MBBI->hasOneMemOperand() && "Expected mem operand"); + MachineMemOperand *OldMMO = MBBI->memoperands().front(); + MachineFunction *MF = MBB.getParent(); + MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4); + MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4); + MIBLo.setMemRefs(MMOLo); + MIBHi.setMemRefs(MMOHi); } + MBBI->eraseFromParent(); return true; } @@ -364,46 +367,48 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB, Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd); - assert(MBBI->hasOneMemOperand() && "Expected mem operand"); - MachineMemOperand *OldMMO = MBBI->memoperands().front(); - MachineFunction *MF = MBB.getParent(); - MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4); - MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4); + MachineInstrBuilder MIBLo, MIBHi; // If the register of operand 1 is equal to the Lo register, then swap the // order of loading the Lo and Hi statements. bool IsOp1EqualToLo = Lo == MBBI->getOperand(1).getReg(); // Order: Lo, Hi if (!IsOp1EqualToLo) { - BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo) - .addReg(MBBI->getOperand(1).getReg()) - .add(MBBI->getOperand(2)) - .setMemRefs(MMOLo); + MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo) + .addReg(MBBI->getOperand(1).getReg()) + .add(MBBI->getOperand(2)); } if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) { auto Offset = MBBI->getOperand(2).getOffset(); assert(Offset % 8 == 0); MBBI->getOperand(2).setOffset(Offset + 4); - BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi) - .addReg(MBBI->getOperand(1).getReg()) - .add(MBBI->getOperand(2)) - .setMemRefs(MMOHi); + MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi) + .addReg(MBBI->getOperand(1).getReg()) + .add(MBBI->getOperand(2)); MBBI->getOperand(2).setOffset(Offset); } else { assert(isInt<12>(MBBI->getOperand(2).getImm() + 4)); - BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi) - .addReg(MBBI->getOperand(1).getReg()) - .addImm(MBBI->getOperand(2).getImm() + 4) - .setMemRefs(MMOHi); + MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi) + .addReg(MBBI->getOperand(1).getReg()) + .addImm(MBBI->getOperand(2).getImm() + 4); } // Order: Hi, Lo if (IsOp1EqualToLo) { - BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo) - .addReg(MBBI->getOperand(1).getReg()) - .add(MBBI->getOperand(2)) - .setMemRefs(MMOLo); + MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo) + .addReg(MBBI->getOperand(1).getReg()) + .add(MBBI->getOperand(2)); + } + + if (!MBBI->memoperands_empty()) { + assert(MBBI->hasOneMemOperand() && "Expected mem operand"); + MachineMemOperand *OldMMO = MBBI->memoperands().front(); + MachineFunction *MF = MBB.getParent(); + MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4); + MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4); + MIBLo.setMemRefs(MMOLo); + MIBHi.setMemRefs(MMOHi); } MBBI->eraseFromParent(); From 48adfaf3b290d97260eabb53254de9ada313cd0e Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 29 Oct 2024 08:05:33 +0100 Subject: [PATCH 255/425] doc: remove trailing whitespaces --- clang/docs/FunctionEffectAnalysis.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/docs/FunctionEffectAnalysis.rst b/clang/docs/FunctionEffectAnalysis.rst index f286aec4f5b98fd..3f2c4db7bad0cb7 100644 --- a/clang/docs/FunctionEffectAnalysis.rst +++ b/clang/docs/FunctionEffectAnalysis.rst @@ -54,9 +54,9 @@ attached to functions, blocks, function pointers, lambdas, and member functions. The attribute applies only to the function itself. In particular, it does not apply to any nested functions or declarations, such as blocks, lambdas, and local classes. -This document uses the C++/C23 syntax ``[[clang::nonblocking]]``, since it parallels the placement +This document uses the C++/C23 syntax ``[[clang::nonblocking]]``, since it parallels the placement of the ``noexcept`` specifier, and the attributes have other similarities to ``noexcept``. The GNU -``__attribute__((nonblocking))`` syntax is also supported. Note that it requires a different +``__attribute__((nonblocking))`` syntax is also supported. Note that it requires a different placement on a C++ type alias. Like ``noexcept``, ``nonblocking`` and ``nonallocating`` have an optional argument, a compile-time @@ -76,10 +76,10 @@ series of performance constraints. From weakest to strongest: - ``nonblocking``: The function type will never block on a lock, allocate memory on the heap, or throw an exception. -``nonblocking`` includes the ``nonallocating`` guarantee. +``nonblocking`` includes the ``nonallocating`` guarantee. While ``nonblocking`` and ``nonallocating`` are conceptually a superset of ``noexcept``, neither -attribute implicitly specifies ``noexcept``. Further, ``noexcept`` has a specified runtime behavior of +attribute implicitly specifies ``noexcept``. Further, ``noexcept`` has a specified runtime behavior of aborting if an exception is thrown, while the ``nonallocating`` and ``nonblocking`` attributes are mainly for compile-time analysis and have no runtime behavior, except in code built with Clang's :doc:`RealtimeSanitizer`. Nonetheless, Clang emits a @@ -95,7 +95,7 @@ function, as described in the section "Analysis and warnings", below. explicitly disable any potential inference of ``nonblocking`` or ``nonallocating`` during verification. (Inference is described later in this document). ``nonblocking(false)`` and ``nonallocating(false)`` are legal, but superfluous when applied to a function *type* -that is not part of a declarator: ``float (int) [[nonblocking(false)]]`` and +that is not part of a declarator: ``float (int) [[nonblocking(false)]]`` and ``float (int)`` are identical types. For functions with no explicit performance constraint, the worst is assumed: the function @@ -153,7 +153,7 @@ are comparable to that for ``noexcept`` in C++17 and later. void (*fp_nonallocating)() [[clang::nonallocating]]; fp_nonallocating = nullptr; fp_nonallocating = nonallocating; - fp_nonallocating = nonblocking; // no warning because nonblocking includes nonallocating + fp_nonallocating = nonblocking; // no warning because nonblocking includes nonallocating fp_nonallocating = unannotated; // ^ warning: attribute 'nonallocating' should not be added via type conversion } @@ -274,7 +274,7 @@ following rules. Such functions: from the analysis. (The reason for requiring ``noexcept`` in C++ is that a function declared ``noreturn`` could be a wrapper for ``throw``.) -5. May not invoke or access an Objective-C method or property, since ``objc_msgSend()`` calls into +5. May not invoke or access an Objective-C method or property, since ``objc_msgSend()`` calls into the Objective-C runtime, which may allocate memory or otherwise block. 6. May not access thread-local variables. Typically, thread-local variables are allocated on the From 59085e9c3f522b59e512f1651058a9f98c739088 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 29 Oct 2024 16:16:26 +0900 Subject: [PATCH 256/425] Fix more lld warnings introduced in #111434 [-Wnontrivial-memaccess] --- lld/ELF/Arch/ARM.cpp | 2 +- lld/ELF/SymbolTable.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp index be3f80337aae71c..271e2e27a269494 100644 --- a/lld/ELF/Arch/ARM.cpp +++ b/lld/ELF/Arch/ARM.cpp @@ -1207,7 +1207,7 @@ template void ObjFile::importCmseSymbols() { Defined *sym = reinterpret_cast(make()); // Initialize symbol fields. - memset(sym, 0, sizeof(Symbol)); + memset(static_cast(sym), 0, sizeof(Symbol)); sym->setName(CHECK(eSyms[i].getName(stringTable), this)); sym->value = eSym.st_value; sym->size = eSym.st_size; diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 674b1ef983f8430..8878acdc43e87ff 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -56,7 +56,7 @@ void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { // alias for sym, but that could degrade the user experience of some tools // that can print out only one symbol for each location: sym is a preferred // name than real, but they might print out real instead. - memcpy(real, sym, sizeof(SymbolUnion)); + memcpy(static_cast(real), sym, sizeof(SymbolUnion)); real->isUsedInRegularObj = false; } @@ -87,7 +87,7 @@ Symbol *SymbolTable::insert(StringRef name) { symVector.push_back(sym); // *sym was not initialized by a constructor. Initialize all Symbol fields. - memset(sym, 0, sizeof(Symbol)); + memset(static_cast(sym), 0, sizeof(Symbol)); sym->setName(name); sym->partition = 1; sym->versionId = VER_NDX_GLOBAL; From 66fc81cb430c724dc8b4b8d2d029101cc1d6808a Mon Sep 17 00:00:00 2001 From: Takuto Ikuta Date: Tue, 29 Oct 2024 16:30:06 +0900 Subject: [PATCH 257/425] fix missing include for `abort` in `FuzzedDataProvider.h` (#113872) This is to fix build with newer libc++ in chromium. ref: https://crbug.com/375980422 From 335e68d8bce5ad3f5d6471c0ec1423211c71c0f0 Mon Sep 17 00:00:00 2001 From: Jesse Huang Date: Tue, 29 Oct 2024 00:47:49 -0700 Subject: [PATCH 258/425] [Clang][RISCV] Support -fcf-protection=return for RISC-V (#112477) Enables the support of `-fcf-protection=return` on RISC-V, which requires Zicfiss. It also adds a string attribute "hw-shadow-stack" to every function if the option is set on RISC-V --- clang/lib/Basic/Targets/RISCV.h | 7 +++++++ clang/lib/CodeGen/Targets/RISCV.cpp | 7 +++++-- clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c | 9 +++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index bf40edb8683b3e7..3b418585ab4a399 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -141,6 +141,13 @@ class RISCVTargetInfo : public TargetInfo { return true; } + bool + checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const override { + if (ISAInfo->hasExtension("zicfiss")) + return true; + return TargetInfo::checkCFProtectionReturnSupported(Diags); + } + CFBranchLabelSchemeKind getDefaultCFBranchLabelScheme() const override { return CFBranchLabelSchemeKind::FuncSig; } diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index fd72fe673b9b14d..b04e436c665f523 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -594,6 +594,11 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { const auto *FD = dyn_cast_or_null(D); if (!FD) return; + auto *Fn = cast(GV); + + if (CGM.getCodeGenOpts().CFProtectionReturn) + Fn->addFnAttr("hw-shadow-stack"); + const auto *Attr = FD->getAttr(); if (!Attr) return; @@ -604,8 +609,6 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo { case RISCVInterruptAttr::machine: Kind = "machine"; break; } - auto *Fn = cast(GV); - Fn->addFnAttr("interrupt", Kind); } }; diff --git a/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c b/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c new file mode 100644 index 000000000000000..cabff7e598eb02a --- /dev/null +++ b/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zicfiss -emit-llvm -o - %s -fcf-protection=return | FileCheck %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zicfiss -emit-llvm -o - %s | FileCheck -check-prefix=NOSHADOWSTACK %s +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zicfiss -emit-llvm -o - %s -fcf-protection=return | FileCheck %s +// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zicfiss -emit-llvm -o - %s | FileCheck -check-prefix=NOSHADOWSTACK %s + +int foo(int *a) { return *a; } + +// CHECK: attributes {{.*}}"hw-shadow-stack"{{.*}} +// NOSHADOWSTACK-NOT: attributes {{.*}}"hw-shadow-stack"{{.*}} From 7544d3af0e285ecd2fa28698621dd3125f749b2d Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Tue, 29 Oct 2024 07:57:34 +0000 Subject: [PATCH 259/425] [RISCV] Mark RVB23U64 and RVB23S64 as non-experimental (#113918) The specification was recently ratified . --- clang/test/Driver/print-supported-extensions-riscv.c | 4 ++-- clang/test/Driver/riscv-profiles.c | 4 ++-- llvm/docs/RISCVUsage.rst | 4 ++-- llvm/docs/ReleaseNotes.md | 3 ++- llvm/lib/Target/RISCV/RISCVProfiles.td | 4 ++-- llvm/test/CodeGen/RISCV/attributes.ll | 4 ++-- llvm/unittests/TargetParser/RISCVISAInfoTest.cpp | 4 ++-- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c index e39847b9c31a8ed..68acde65a74bfb6 100644 --- a/clang/test/Driver/print-supported-extensions-riscv.c +++ b/clang/test/Driver/print-supported-extensions-riscv.c @@ -193,12 +193,12 @@ // CHECK-NEXT: rva22u64 // CHECK-NEXT: rva23s64 // CHECK-NEXT: rva23u64 +// CHECK-NEXT: rvb23s64 +// CHECK-NEXT: rvb23u64 // CHECK-NEXT: rvi20u32 // CHECK-NEXT: rvi20u64 // CHECK-EMPTY: // CHECK-NEXT: Experimental Profiles -// CHECK-NEXT: rvb23s64 -// CHECK-NEXT: rvb23u64 // CHECK-NEXT: rvm23u32 // CHECK-EMPTY: // CHECK-NEXT: Use -march to specify the target's extension. diff --git a/clang/test/Driver/riscv-profiles.c b/clang/test/Driver/riscv-profiles.c index d85ac8baf4edd96..2b4d19422874cfb 100644 --- a/clang/test/Driver/riscv-profiles.c +++ b/clang/test/Driver/riscv-profiles.c @@ -207,7 +207,7 @@ // RVA23S64: "-target-feature" "+svnapot" // RVA23S64: "-target-feature" "+svpbmt" -// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23u64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23u64 \ // RUN: | FileCheck -check-prefix=RVB23U64 %s // RVB23U64: "-target-feature" "+m" // RVB23U64: "-target-feature" "+a" @@ -239,7 +239,7 @@ // RVB23U64: "-target-feature" "+zbs" // RVB23U64: "-target-feature" "+zkt" -// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23s64 -menable-experimental-extensions \ +// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23s64 \ // RUN: | FileCheck -check-prefix=RVB23S64 %s // RVB23S64: "-target-feature" "+m" // RVB23S64: "-target-feature" "+a" diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 04f2c357766d448..f6f2eb45c49c17f 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -84,6 +84,8 @@ ISA naming string. Currently supported profiles: * ``rva22s64`` * ``rva23u64`` * ``rva23s64`` +* ``rvb23u64`` +* ``rvb23s64`` Note that you can also append additional extension names to be enabled, e.g. ``rva20u64_zicond`` will enable the ``zicond`` extension in addition to those @@ -93,8 +95,6 @@ Profiles that are not yet ratified cannot be used unless ``-menable-experimental-extensions`` (or equivalent for other tools) is specified. This applies to the following profiles: -* ``rvb23u64`` -* ``rvb23s64`` * ``rvm23u32`` .. _riscv-extensions: diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index ac7a795daf791ab..92a45d845f1db8e 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -184,7 +184,8 @@ Changes to the RISC-V Backend * The `Smmpm`, `Smnpm`, `Ssnpm`, `Supm`, and `Sspm` pointer masking extensions are no longer marked as experimental. * The `Sha` extension is now supported. -* The RVA23U64 and RVA23S64 profiles are no longer marked as experimental. +* The RVA23U64, RVA23S64, RVB23U64, and RVB23S64 profiles are no longer marked + as experimental. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Target/RISCV/RISCVProfiles.td b/llvm/lib/Target/RISCV/RISCVProfiles.td index ea0fe08abd7a147..cbf2a2eddf38ed8 100644 --- a/llvm/lib/Target/RISCV/RISCVProfiles.td +++ b/llvm/lib/Target/RISCV/RISCVProfiles.td @@ -163,6 +163,6 @@ def RVA22U64 : RISCVProfile<"rva22u64", RVA22U64Features>; def RVA22S64 : RISCVProfile<"rva22s64", RVA22S64Features>; def RVA23U64 : RISCVProfile<"rva23u64", RVA23U64Features>; def RVA23S64 : RISCVProfile<"rva23s64", RVA23S64Features>; -def RVB23U64 : RISCVExperimentalProfile<"rvb23u64", RVB23U64Features>; -def RVB23S64 : RISCVExperimentalProfile<"rvb23s64", RVB23S64Features>; +def RVB23U64 : RISCVProfile<"rvb23u64", RVB23U64Features>; +def RVB23S64 : RISCVProfile<"rvb23s64", RVB23S64Features>; def RVM23U32 : RISCVExperimentalProfile<"rvm23u32", RVM23U32Features>; diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 2545c7075e4cf55..c03108c0617e75a 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -293,8 +293,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+rva22s64 %s -o - | FileCheck --check-prefix=RVA22S64 %s ; RUN: llc -mtriple=riscv64 -mattr=+rva23u64 %s -o - | FileCheck --check-prefix=RVA23U64 %s ; RUN: llc -mtriple=riscv64 -mattr=+rva23s64 %s -o - | FileCheck --check-prefix=RVA23S64 %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-rvb23u64 %s -o - | FileCheck --check-prefix=RVB23U64 %s -; RUN: llc -mtriple=riscv64 -mattr=+experimental-rvb23s64 %s -o - | FileCheck --check-prefix=RVB23S64 %s +; RUN: llc -mtriple=riscv64 -mattr=+rvb23u64 %s -o - | FileCheck --check-prefix=RVB23U64 %s +; RUN: llc -mtriple=riscv64 -mattr=+rvb23s64 %s -o - | FileCheck --check-prefix=RVB23S64 %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-rvm23u32 %s -o - | FileCheck --check-prefix=RVM23U32 %s ; CHECK: .attribute 4, 16 diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index 48792ad0265fc46..a1d493e12fda6df 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -1138,12 +1138,12 @@ Supported Profiles rva22u64 rva23s64 rva23u64 + rvb23s64 + rvb23u64 rvi20u32 rvi20u64 Experimental Profiles - rvb23s64 - rvb23u64 rvm23u32 Use -march to specify the target's extension. From d4197f3ac1bbdd0665599bf3843e865d13af18ab Mon Sep 17 00:00:00 2001 From: CarolineConcatto Date: Tue, 29 Oct 2024 09:09:13 +0000 Subject: [PATCH 260/425] [LLVM][AArch64] Add assembly/disassembly for MUL/BFMUL SME instructions (#113535) According to https://developer.arm.com/documentation/ddi0602 Co-authored-by: Momchil-Velikov Momchil.Velikov@arm.com --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 110 ++++++++ llvm/test/MC/AArch64/SME2/bfmul-diagnostics.s | 111 ++++++++ llvm/test/MC/AArch64/SME2/bfmul.s | 92 ++++++ .../test/MC/AArch64/SME2p2/fmul-diagnostics.s | 112 ++++++++ llvm/test/MC/AArch64/SME2p2/fmul.s | 261 ++++++++++++++++++ 6 files changed, 696 insertions(+) create mode 100644 llvm/test/MC/AArch64/SME2/bfmul-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/bfmul.s create mode 100644 llvm/test/MC/AArch64/SME2p2/fmul-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2p2/fmul.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index b763aa15a7c3f15..76e0501a5cc233e 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1007,6 +1007,11 @@ let Predicates = [HasSME2p2] in { defm BFMOP4A : sme2_bfmop4as_widening<0, "bfmop4a">; defm BFMOP4S : sme2_bfmop4as_widening<1, "bfmop4s">; + + defm FMUL_2ZZ : sme2_multi2_fmul_sm<"fmul">; + defm FMUL_2Z2Z : sme2_multi2_fmul_mm< "fmul">; + defm FMUL_4ZZ : sme2_multi4_fmul_sm<"fmul">; + defm FMUL_4Z4Z : sme2_multi4_fmul_mm< "fmul">; } // [HasSME2p2] let Predicates = [HasSME2p2, HasSMEB16B16] in { @@ -1024,3 +1029,8 @@ let Predicates = [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR] in { let Predicates = [HasSME2p2, HasSMEF16F16] in { def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">; } // [HasSME2p2, HasSMEF16F16] + +let Predicates = [HasSME2, HasSVEBFSCALE] in { + defm BFMUL : sme2_bfmul_single<"bfmul">; + defm BFMUL : sme2_bfmul_multi<"bfmul">; +} //[HasSME2, HasSVEBFSCALE] diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 4cfe18eddf481cb..a05c5206320f714 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5225,3 +5225,113 @@ multiclass sme2_bfmop4as_widening { // Multiple vectors def _M2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; } + +class sme2_multi2_fmul_sm size, string mnemonic, RegisterOperand vector_ty, RegisterOperand zpr_ty> + : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, zpr_ty:$Zm), + mnemonic, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<4> Zd; + bits<4> Zn; + bits<4> Zm; + + let Inst{31-24} = 0b11000001; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-17} = Zm; + let Inst{16-10} = 0b0111010; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; +} + +multiclass sme2_multi2_fmul_sm { + def _H : sme2_multi2_fmul_sm<0b01, mnemonic, ZZ_h_mul_r, ZPR4b16>; + def _S : sme2_multi2_fmul_sm<0b10, mnemonic, ZZ_s_mul_r, ZPR4b32>; + def _D : sme2_multi2_fmul_sm<0b11, mnemonic, ZZ_d_mul_r, ZPR4b64>; +} + +class sme2_multi4_fmul_sm size, string mnemonic, RegisterOperand vector_ty, RegisterOperand zpr_ty> + : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, zpr_ty:$Zm), + mnemonic, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<3> Zd; + bits<3> Zn; + bits<4> Zm; + + let Inst{31-24} = 0b11000001; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-17} = Zm; + let Inst{16-10} = 0b1111010; + let Inst{9-7} = Zn; + let Inst{6-5} = 0b00; + let Inst{4-2} = Zd; + let Inst{1-0} = 0b00; +} + +multiclass sme2_multi4_fmul_sm { + def _H : sme2_multi4_fmul_sm<0b01, mnemonic, ZZZZ_h_mul_r, ZPR4b16>; + def _S : sme2_multi4_fmul_sm<0b10, mnemonic, ZZZZ_s_mul_r, ZPR4b32>; + def _D : sme2_multi4_fmul_sm<0b11, mnemonic, ZZZZ_d_mul_r, ZPR4b64>; +} + +multiclass sme2_bfmul_single { + def _2ZZ : sme2_multi2_fmul_sm<0b00, mnemonic, ZZ_h_mul_r, ZPR4b16>; + def _4ZZ : sme2_multi4_fmul_sm<0b00, mnemonic, ZZZZ_h_mul_r, ZPR4b16>; +} + +class sme2_multi2_fmul_mm size, string mnemonic, RegisterOperand vector_ty> + : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm), + mnemonic, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<4> Zd; + bits<4> Zn; + bits<4> Zm; + + let Inst{31-24} = 0b11000001; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-17} = Zm; + let Inst{16-10} = 0b0111001; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; +} + +multiclass sme2_multi2_fmul_mm { + def _H : sme2_multi2_fmul_mm<0b01, mnemonic, ZZ_h_mul_r>; + def _S : sme2_multi2_fmul_mm<0b10, mnemonic, ZZ_s_mul_r>; + def _D : sme2_multi2_fmul_mm<0b11, mnemonic, ZZ_d_mul_r>; +} + +class sme2_multi4_fmul_mm size, string mnemonic, RegisterOperand vector_ty> + : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm), + mnemonic, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<3> Zd; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-24} = 0b11000001; + let Inst{23-22} = size; + let Inst{21} = 0b1; + let Inst{20-18} = Zm; + let Inst{17-10} = 0b01111001; + let Inst{9-7} = Zn; + let Inst{6-5} = 0b00; + let Inst{4-2} = Zd; + let Inst{1-0} = 0b00; +} + +multiclass sme2_multi4_fmul_mm { + def _H : sme2_multi4_fmul_mm<0b01, mnemonic, ZZZZ_h_mul_r>; + def _S : sme2_multi4_fmul_mm<0b10, mnemonic, ZZZZ_s_mul_r>; + def _D : sme2_multi4_fmul_mm<0b11, mnemonic, ZZZZ_d_mul_r>; +} + +multiclass sme2_bfmul_multi { + def _2Z2Z : sme2_multi2_fmul_mm<0b00, mnemonic, ZZ_h_mul_r>; + def _4Z4Z : sme2_multi4_fmul_mm<0b00, mnemonic, ZZZZ_h_mul_r>; +} diff --git a/llvm/test/MC/AArch64/SME2/bfmul-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmul-diagnostics.s new file mode 100644 index 000000000000000..c28cc5cd426dda5 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bfmul-diagnostics.s @@ -0,0 +1,111 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2,+sve-bfscale 2>&1 < %s| FileCheck %s + +// Multiple and single, 2 regs + +bfmul {z0.s-z1.s}, {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z1.h-z2.h}, {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +bfmul {z0.h-z2.h}, {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z1.h}, {z0.s-z1.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z1.h}, {z1.h-z2.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +bfmul {z0.h-z1.h}, {z0.h-z2.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z1.h}, {z0.h-z1.h}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +bfmul {z0.h-z1.h}, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +// Multiple and single, 4 regs + +bfmul {z0.s-z3.s}, {z0.h-z3.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z1.h-z4.h}, {z0.h-z3.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types + +bfmul {z0.h-z4.h}, {z0.h-z3.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +bfmul {z0.h-z3.h}, {z0.s-z3.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z3.h}, {z1.h-z4.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types + +bfmul {z0.h-z3.h}, {z0.h-z4.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +bfmul {z0.h-z3.h}, {z0.h-z3.h}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +bfmul {z0.h-z3.h}, {z0.h-z3.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +// Multiple, 2 regs + +bfmul {z0.s-z1.s}, {z0.h-z1.h}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z1.h-z2.h}, {z0.h-z1.h}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +bfmul {z0.h-z2.h}, {z0.h-z1.h}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z1.h}, {z0.s-z1.s}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z1.h}, {z1.h-z2.h}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +bfmul {z0.h-z1.h}, {z0.h-z2.h}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z1.h}, {z0.h-z1.h}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z1.h}, {z0.h-z1.h}, {z1.h-z2.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +bfmul {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z2.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +// Multiple, 4 regs + +bfmul {z0.s-z3.s}, {z0.h-z3.h}, {z0.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z1.h-z4.h}, {z0.h-z3.h}, {z0.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types + +bfmul {z0.h-z4.h}, {z0.h-z3.h}, {z0.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +bfmul {z0.h-z3.h}, {z0.s-z3.s}, {z0.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z3.h}, {z1.h-z4.h}, {z0.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types + +bfmul {z0.h-z3.h}, {z0.h-z4.h}, {z0.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +bfmul {z0.h-z3.h}, {z0.h-z3.h}, {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmul {z0.h-z3.h}, {z0.h-z3.h}, {z1.h-z4.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types + +bfmul {z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z4.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors diff --git a/llvm/test/MC/AArch64/SME2/bfmul.s b/llvm/test/MC/AArch64/SME2/bfmul.s new file mode 100644 index 000000000000000..10a43848c738199 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bfmul.s @@ -0,0 +1,92 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sve-bfscale < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sve-bfscale - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sve-bfscale < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sve-bfscale -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// Multiple and single, 2 regs + +bfmul {z0.h-z1.h}, {z0.h-z1.h}, z0.h // 11000001-00100000-11101000-00000000 +// CHECK-INST: bfmul { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0xe8,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c120e800 + +bfmul {z20.h-z21.h}, {z10.h-z11.h}, z10.h // 11000001-00110100-11101001-01010100 +// CHECK-INST: bfmul { z20.h, z21.h }, { z10.h, z11.h }, z10.h +// CHECK-ENCODING: [0x54,0xe9,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c134e954 + +bfmul {z30.h-z31.h}, {z30.h-z31.h}, z15.h // 11000001-00111110-11101011-11011110 +// CHECK-INST: bfmul { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0xde,0xeb,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c13eebde + +// Multiple and single, 4 regs + +bfmul {z0.h-z3.h}, {z0.h-z3.h}, z0.h // 11000001-00100001-11101000-00000000 +// CHECK-INST: bfmul { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0xe8,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c121e800 + +bfmul {z20.h-z23.h}, {z8.h-z11.h}, z10.h // 11000001-00110101-11101001-00010100 +// CHECK-INST: bfmul { z20.h - z23.h }, { z8.h - z11.h }, z10.h +// CHECK-ENCODING: [0x14,0xe9,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c135e914 + +bfmul {z28.h-z31.h}, {z28.h-z31.h}, z15.h // 11000001-00111111-11101011-10011100 +// CHECK-INST: bfmul { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x9c,0xeb,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c13feb9c + +// Multiple, 2 regs +bfmul {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h} // 11000001-00100000-11100100-00000000 +// CHECK-INST: bfmul { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xe4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c120e400 + +bfmul {z20.h-z21.h}, {z10.h-z11.h}, {z20.h-z21.h} // 11000001-00110100-11100101-01010100 +// CHECK-INST: bfmul { z20.h, z21.h }, { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x54,0xe5,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c134e554 + +bfmul {z30.h-z31.h}, {z30.h-z31.h}, {z30.h-z31.h} // 11000001-00111110-11100111-11011110 +// CHECK-INST: bfmul { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xde,0xe7,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c13ee7de + +// Multiple, 4 regs + +bfmul {z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h} // 11000001-00100001-11100100-00000000 +// CHECK-INST: bfmul { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0xe4,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c121e400 + +bfmul {z20.h-z23.h}, {z8.h-z11.h}, {z20.h-z23.h} // 11000001-00110101-11100101-00010100 +// CHECK-INST: bfmul { z20.h - z23.h }, { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x14,0xe5,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c135e514 + +bfmul {z28.h-z31.h}, {z28.h-z31.h}, {z28.h-z31.h} // 11000001-00111101-11100111-10011100 +// CHECK-INST: bfmul { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9c,0xe7,0x3d,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c13de79c diff --git a/llvm/test/MC/AArch64/SME2p2/fmul-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmul-diagnostics.s new file mode 100644 index 000000000000000..2fdd3f82adc1ddd --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmul-diagnostics.s @@ -0,0 +1,112 @@ + +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 2>&1 < %s| FileCheck %s + +// Multiple and single, 2 regs + +fmul {z0.b-z1.b}, {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z1.s-z2.s}, {z0.s-z1.s}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +fmul {z0.d-z2.d}, {z0.d-z1.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.h-z1.h}, {z0.b-z1.b}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.s-z1.s}, {z1.s-z2.s}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +fmul {z0.d-z1.d}, {z0.d-z2.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.h-z1.h}, {z0.h-z1.h}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +fmul {z0.s-z1.s}, {z0.s-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.s..z15.s + +// Multiple and single, 4 regs + +fmul {z0.b-z3.b}, {z0.h-z3.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z1.s-z3.s}, {z0.h-z3.h}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.d-z4.d}, {z0.d-z3.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +fmul {z0.h-z3.h}, {z0.b-z3.b}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.s-z3.s}, {z1.s-z3.s}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.d-z3.d}, {z0.d-z4.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +fmul {z0.h-z3.h}, {z0.h-z3.h}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +fmul {z0.s-z3.s}, {z0.s-z3.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.s..z15.s + +// Multiple, 2 regs + +fmul {z0.b-z1.b}, {z0.h-z1.h}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z1.s-z2.s}, {z0.s-z1.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +fmul {z0.d-z2.d}, {z0.d-z1.d}, {z0.d-z1.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.h-z1.h}, {z0.b-z1.b}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.s-z1.s}, {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +fmul {z0.d-z1.d}, {z0.d-z2.d}, {z0.d-z1.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.h-z1.h}, {z0.h-z1.h}, {z0.b-z1.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.s-z1.s}, {z0.s-z1.s}, {z1.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +fmul {z0.d-z1.d}, {z0.d-z1.d}, {z0.d-z2.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +// Multiple, 4 regs + +fmul {z0.b-z3.b}, {z0.h-z3.h}, {z0.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z1.s-z3.s}, {z0.s-z3.s}, {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.d-z4.d}, {z0.d-z3.d}, {z0.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +fmul {z0.h-z3.h}, {z0.b-z3.b}, {z0.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.s-z3.s}, {z1.s-z3.s}, {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.d-z3.d}, {z0.d-z4.d}, {z0.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +fmul {z0.h-z3.h}, {z0.h-z3.h}, {z0.b-z3.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.s-z3.s}, {z0.s-z3.s}, {z1.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmul {z0.d-z3.d}, {z0.d-z3.d}, {z0.d-z4.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors diff --git a/llvm/test/MC/AArch64/SME2p2/fmul.s b/llvm/test/MC/AArch64/SME2p2/fmul.s new file mode 100644 index 000000000000000..ec6f523867cef51 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmul.s @@ -0,0 +1,261 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// Multiple and single, 2 regs + +// 16-bit elements + +fmul {z0.h-z1.h}, {z0.h-z1.h}, z0.h // 11000001-01100000-11101000-00000000 +// CHECK-INST: fmul { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0xe8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c160e800 + +fmul {z20.h-z21.h}, {z10.h-z11.h}, z10.h // 11000001-01110100-11101001-01010100 +// CHECK-INST: fmul { z20.h, z21.h }, { z10.h, z11.h }, z10.h +// CHECK-ENCODING: [0x54,0xe9,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c174e954 + +fmul {z30.h-z31.h}, {z30.h-z31.h}, z15.h // 11000001-01111110-11101011-11011110 +// CHECK-INST: fmul { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0xde,0xeb,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c17eebde + +// 32-bit elements + +fmul {z0.s-z1.s}, {z0.s-z1.s}, z0.s // 11000001-10100000-11101000-00000000 +// CHECK-INST: fmul { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x00,0xe8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1a0e800 + +fmul {z20.s-z21.s}, {z10.s-z11.s}, z10.s // 11000001-10110100-11101001-01010100 +// CHECK-INST: fmul { z20.s, z21.s }, { z10.s, z11.s }, z10.s +// CHECK-ENCODING: [0x54,0xe9,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1b4e954 + +fmul {z30.s-z31.s}, {z30.s-z31.s}, z15.s // 11000001-10111110-11101011-11011110 +// CHECK-INST: fmul { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0xde,0xeb,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1beebde + +// 64-bit elements + +fmul {z0.d-z1.d}, {z0.d-z1.d}, z0.d // 11000001-11100000-11101000-00000000 +// CHECK-INST: fmul { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x00,0xe8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1e0e800 + +fmul {z20.d-z21.d}, {z10.d-z11.d}, z10.d // 11000001-11110100-11101001-01010100 +// CHECK-INST: fmul { z20.d, z21.d }, { z10.d, z11.d }, z10.d +// CHECK-ENCODING: [0x54,0xe9,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1f4e954 + +fmul {z30.d-z31.d}, {z30.d-z31.d}, z15.d // 11000001-11111110-11101011-11011110 +// CHECK-INST: fmul { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0xde,0xeb,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1feebde + +// Multiple and single, 4 regs + +// 16-bit elements + +fmul {z0.h-z3.h}, {z0.h-z3.h}, z0.h // 11000001-01100001-11101000-00000000 +// CHECK-INST: fmul { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0xe8,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c161e800 + +fmul {z20.h-z23.h}, {z8.h-z11.h}, z10.h // 11000001-01110101-11101001-00010100 +// CHECK-INST: fmul { z20.h - z23.h }, { z8.h - z11.h }, z10.h +// CHECK-ENCODING: [0x14,0xe9,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c175e914 + +fmul {z28.h-z31.h}, {z28.h-z31.h}, z15.h // 11000001-01111111-11101011-10011100 +// CHECK-INST: fmul { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x9c,0xeb,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c17feb9c + +// 32-bit elements + +fmul {z0.s-z3.s}, {z0.s-z3.s}, z0.s // 11000001-10100001-11101000-00000000 +// CHECK-INST: fmul { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x00,0xe8,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1a1e800 + +fmul {z20.s-z23.s}, {z8.s-z11.s}, z10.s // 11000001-10110101-11101001-00010100 +// CHECK-INST: fmul { z20.s - z23.s }, { z8.s - z11.s }, z10.s +// CHECK-ENCODING: [0x14,0xe9,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1b5e914 + +fmul {z28.s-z31.s}, {z28.s-z31.s}, z15.s // 11000001-10111111-11101011-10011100 +// CHECK-INST: fmul { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x9c,0xeb,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1bfeb9c + +// 64-bit elements + +fmul {z0.d-z3.d}, {z0.d-z3.d}, z0.d // 11000001-11100001-11101000-00000000 +// CHECK-INST: fmul { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x00,0xe8,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1e1e800 + +fmul {z20.d-z23.d}, {z8.d-z11.d}, z10.d // 11000001-11110101-11101001-00010100 +// CHECK-INST: fmul { z20.d - z23.d }, { z8.d - z11.d }, z10.d +// CHECK-ENCODING: [0x14,0xe9,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1f5e914 + +fmul {z28.d-z31.d}, {z28.d-z31.d}, z15.d // 11000001-11111111-11101011-10011100 +// CHECK-INST: fmul { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x9c,0xeb,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1ffeb9c + +// Multiple, 2 regs + +// 16-bit elements + +fmul {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h} // 11000001-01100000-11100100-00000000 +// CHECK-INST: fmul { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xe4,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c160e400 + +fmul {z20.h-z21.h}, {z10.h-z11.h}, {z20.h-z21.h} // 11000001-01110100-11100101-01010100 +// CHECK-INST: fmul { z20.h, z21.h }, { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x54,0xe5,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c174e554 + +fmul {z30.h-z31.h}, {z30.h-z31.h}, {z30.h-z31.h} // 11000001-01111110-11100111-11011110 +// CHECK-INST: fmul { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xde,0xe7,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c17ee7de + +// 32-bit elememnts + +fmul {z0.s-z1.s}, {z0.s-z1.s}, {z0.s-z1.s} // 11000001-10100000-11100100-00000000 +// CHECK-INST: fmul { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe4,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1a0e400 + +fmul {z20.s-z21.s}, {z10.s-z11.s}, {z20.s-z21.s} // 11000001-10110100-11100101-01010100 +// CHECK-INST: fmul { z20.s, z21.s }, { z10.s, z11.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x54,0xe5,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1b4e554 + +fmul {z30.s-z31.s}, {z30.s-z31.s}, {z30.s-z31.s} // 11000001-10111110-11100111-11011110 +// CHECK-INST: fmul { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe7,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1bee7de + +// 64-bit elements + +fmul {z0.d-z1.d}, {z0.d-z1.d}, {z0.d-z1.d} // 11000001-11100000-11100100-00000000 +// CHECK-INST: fmul { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0xe4,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1e0e400 + +fmul {z20.d-z21.d}, {z10.d-z11.d}, {z20.d-z21.d} // 11000001-11110100-11100101-01010100 +// CHECK-INST: fmul { z20.d, z21.d }, { z10.d, z11.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x54,0xe5,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1f4e554 + +fmul {z30.d-z31.d}, {z30.d-z31.d}, {z30.d-z31.d} // 11000001-11111110-11100111-11011110 +// CHECK-INST: fmul { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0xde,0xe7,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1fee7de + +// Multiple, 4 regs + +// 16-bit elements + +fmul {z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h} // 11000001-01100001-11100100-00000000 +// CHECK-INST: fmul { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0xe4,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c161e400 + +fmul {z20.h-z23.h}, {z8.h-z11.h}, {z20.h-z23.h} // 11000001-01110101-11100101-00010100 +// CHECK-INST: fmul { z20.h - z23.h }, { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x14,0xe5,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c175e514 + +fmul {z28.h-z31.h}, {z28.h-z31.h}, {z28.h-z31.h} // 11000001-01111101-11100111-10011100 +// CHECK-INST: fmul { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9c,0xe7,0x7d,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c17de79c + +// 32-bit elements + +fmul {z0.s-z3.s}, {z0.s-z3.s}, {z0.s-z3.s} // 11000001-10100001-11100100-00000000 +// CHECK-INST: fmul { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe4,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1a1e400 + +fmul {z20.s-z23.s}, {z8.s-z11.s}, {z20.s-z23.s} // 11000001-10110101-11100101-00010100 +// CHECK-INST: fmul { z20.s - z23.s }, { z8.s - z11.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x14,0xe5,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1b5e514 + +fmul {z28.s-z31.s}, {z28.s-z31.s}, {z28.s-z31.s} // 11000001-10111101-11100111-10011100 +// CHECK-INST: fmul { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe7,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1bde79c + +// 64-bit elements + +fmul {z0.d-z3.d}, {z0.d-z3.d}, {z0.d-z3.d} // 11000001-11100001-11100100-00000000 +// CHECK-INST: fmul { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0xe4,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1e1e400 + +fmul {z20.d-z23.d}, {z8.d-z11.d}, {z20.d-z23.d} // 11000001-11110101-11100101-00010100 +// CHECK-INST: fmul { z20.d - z23.d }, { z8.d - z11.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x14,0xe5,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1f5e514 + +fmul {z28.d-z31.d}, {z28.d-z31.d}, {z28.d-z31.d} // 11000001-11111101-11100111-10011100 +// CHECK-INST: fmul { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x9c,0xe7,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: c1fde79c From 8d38fbf2f027c72332c8ba03ff0ff0f83b4dcf02 Mon Sep 17 00:00:00 2001 From: CarolineConcatto Date: Tue, 29 Oct 2024 09:09:55 +0000 Subject: [PATCH 261/425] =?UTF-8?q?[LLVM][AArch64]=20Add=20assembly/disass?= =?UTF-8?q?embly=20for=20SVE=20Integer=20Unary=20Arithm=E2=80=A6=20(#11367?= =?UTF-8?q?0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …etic Predicated instructions This patch adds the following instructions: SVE bitwise unary operations (predicated) CLS, CLZ, CNT, CNOT, FABS, FNEG, NOT SVE integer unary operations (predicated) SXT{B,H,W}, UXT{B,H,W}, ABS ,NEG SVE2 integer unary operations (predicated) URECPE, URSQRTE, SQABS, SQNEG According to https://developer.arm.com/documentation/ddi0602 Co-authored-by: Spencer Abson Spencer.Abson@arm.com --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 53 ++-- llvm/lib/Target/AArch64/SVEInstrFormats.td | 76 +++++- ...unary_arithmetic_predicated_z-diagnotics.s | 249 ++++++++++++++++++ .../SVE2p2/unary_arithmetic_predicated_z.s | 225 ++++++++++++++++ 4 files changed, 576 insertions(+), 27 deletions(-) create mode 100644 llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z-diagnotics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z.s diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 11c64df2eb9278e..2b69903b133fe3b 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -647,22 +647,22 @@ let Predicates = [HasSVEorSME] in { defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>; defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>; - defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", AArch64sxt_mt>; - defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", AArch64uxt_mt>; - defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", AArch64sxt_mt>; - defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxt_mt>; - defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxt_mt>; - defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxt_mt>; - defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", AArch64abs_mt>; - defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", AArch64neg_mt>; - - defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", AArch64cls_mt>; - defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", AArch64clz_mt>; - defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", AArch64cnt_mt>; - defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", AArch64cnot_mt>; - defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", AArch64not_mt>; - defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>; - defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>; + defm SXTB_ZPmZ : sve_int_un_pred_arit_h<0b000, "sxtb", AArch64sxt_mt>; + defm UXTB_ZPmZ : sve_int_un_pred_arit_h<0b001, "uxtb", AArch64uxt_mt>; + defm SXTH_ZPmZ : sve_int_un_pred_arit_w<0b010, "sxth", AArch64sxt_mt>; + defm UXTH_ZPmZ : sve_int_un_pred_arit_w<0b011, "uxth", AArch64uxt_mt>; + defm SXTW_ZPmZ : sve_int_un_pred_arit_d<0b100, "sxtw", AArch64sxt_mt>; + defm UXTW_ZPmZ : sve_int_un_pred_arit_d<0b101, "uxtw", AArch64uxt_mt>; + defm ABS_ZPmZ : sve_int_un_pred_arit< 0b110, "abs", AArch64abs_mt>; + defm NEG_ZPmZ : sve_int_un_pred_arit< 0b111, "neg", AArch64neg_mt>; + + defm CLS_ZPmZ : sve_int_un_pred_arit_bitwise< 0b000, "cls", AArch64cls_mt>; + defm CLZ_ZPmZ : sve_int_un_pred_arit_bitwise< 0b001, "clz", AArch64clz_mt>; + defm CNT_ZPmZ : sve_int_un_pred_arit_bitwise< 0b010, "cnt", AArch64cnt_mt>; + defm CNOT_ZPmZ : sve_int_un_pred_arit_bitwise< 0b011, "cnot", AArch64cnot_mt>; + defm NOT_ZPmZ : sve_int_un_pred_arit_bitwise< 0b110, "not", AArch64not_mt>; + defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>; + defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>; foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in { // No dedicated instruction, so just clear the sign bit. @@ -4271,6 +4271,27 @@ let Predicates = [HasSVE2p2orSME2p2] in { // Floating-point square root, zeroing predicate defm FSQRT_ZPZz : sve_fp_z2op_p_zd_hsd<0b01101, "fsqrt">; + // SVE2p2 integer unary arithmetic (bitwise), zeroing predicate + defm CLS_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b000, "cls">; + defm CLZ_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b001, "clz">; + defm CNT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b010, "cnt">; + defm CNOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b011, "cnot">; + defm NOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b110, "not">; + + // floating point + defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs">; + defm FNEG_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b101, "fneg">; + + // SVE2p2 integer unary arithmetic, zeroing predicate + defm SXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b000, "sxtb">; + defm UXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b001, "uxtb">; + defm SXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b010, "sxth">; + defm UXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b011, "uxth">; + defm ABS_ZPzZ : sve_int_un_pred_arit_z< 0b110, "abs">; + defm NEG_ZPzZ : sve_int_un_pred_arit_z< 0b111, "neg">; + def SXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1000, "sxtw", ZPR64>; + def UXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1010, "uxtw", ZPR64>; + } // End HasSME2p2orSVE2p2 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 72cbad17bc049f6..31312e00b919e23 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -4685,8 +4685,30 @@ class sve_int_un_pred_arit sz8_64, bits<4> opc, let hasSideEffects = 0; } -multiclass sve_int_un_pred_arit_0 opc, string asm, - SDPatternOperator op> { +class sve_int_un_pred_arit_z sz8_64, bits<4> opc, + string asm, ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg/z, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zd; + bits<5> Zn; + let Inst{31-24} = 0b00000100; + let Inst{23-22} = sz8_64; + let Inst{21-20} = 0b00; + let Inst{19} = opc{0}; + let Inst{18-16} = opc{3-1}; + let Inst{15-13} = 0b101; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let hasSideEffects = 0; +} + +multiclass sve_int_un_pred_arit opc, string asm, + SDPatternOperator op> { def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>, SVEPseudo2Instr; def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>, @@ -4712,8 +4734,15 @@ multiclass sve_int_un_pred_arit_0 opc, string asm, defm : SVE_1_Op_PassthruUndef_Pat(NAME # _D_UNDEF)>; } -multiclass sve_int_un_pred_arit_0_h opc, string asm, - SDPatternOperator op> { +multiclass sve_int_un_pred_arit_z opc, string asm> { + def _B : sve_int_un_pred_arit_z<0b00, { opc, 0b0 }, asm, ZPR8>; + def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b0 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_h opc, string asm, + SDPatternOperator op> { def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>, SVEPseudo2Instr; def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>, @@ -4734,8 +4763,14 @@ multiclass sve_int_un_pred_arit_0_h opc, string asm, defm : SVE_InReg_Extend_PassthruUndef(NAME # _D_UNDEF)>; } -multiclass sve_int_un_pred_arit_0_w opc, string asm, - SDPatternOperator op> { +multiclass sve_int_un_pred_arit_h_z opc, string asm> { + def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b0 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_w opc, string asm, + SDPatternOperator op> { def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>, SVEPseudo2Instr; def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, @@ -4751,8 +4786,13 @@ multiclass sve_int_un_pred_arit_0_w opc, string asm, defm : SVE_InReg_Extend_PassthruUndef(NAME # _D_UNDEF)>; } -multiclass sve_int_un_pred_arit_0_d opc, string asm, - SDPatternOperator op> { +multiclass sve_int_un_pred_arit_w_z opc, string asm> { + def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_d opc, string asm, + SDPatternOperator op> { def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>, SVEPseudo2Instr; @@ -4763,8 +4803,8 @@ multiclass sve_int_un_pred_arit_0_d opc, string asm, defm : SVE_InReg_Extend_PassthruUndef(NAME # _D_UNDEF)>; } -multiclass sve_int_un_pred_arit_1 opc, string asm, - SDPatternOperator op> { +multiclass sve_int_un_pred_arit_bitwise opc, string asm, + SDPatternOperator op> { def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>, SVEPseudo2Instr; def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>, @@ -4790,7 +4830,15 @@ multiclass sve_int_un_pred_arit_1 opc, string asm, defm : SVE_1_Op_PassthruUndef_Pat(NAME # _D_UNDEF)>; } -multiclass sve_int_un_pred_arit_1_fp opc, string asm, SDPatternOperator op> { +multiclass sve_int_un_pred_arit_bitwise_z opc, string asm> { + def _B : sve_int_un_pred_arit_z<0b00, { opc, 0b1 }, asm, ZPR8>; + def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>; +} + +multiclass sve_int_un_pred_arit_bitwise_fp opc, string asm, + SDPatternOperator op> { def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>, SVEPseudo2Instr; def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>, @@ -4817,6 +4865,12 @@ multiclass sve_int_un_pred_arit_1_fp opc, string asm, SDPatternOperator defm : SVE_1_Op_PassthruUndef_Pat(NAME # _D_UNDEF)>; } +multiclass sve_int_un_pred_arit_bitwise_fp_z opc, string asm> { + def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>; + def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>; + def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>; +} + //===----------------------------------------------------------------------===// // SVE Integer Wide Immediate - Unpredicated Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z-diagnotics.s b/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z-diagnotics.s new file mode 100644 index 000000000000000..b44039fae464a97 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z-diagnotics.s @@ -0,0 +1,249 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid element width +abs z31.b, p7/z, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: abs z31.b, p7/z, z31.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cls z31.d, p7/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: cls z31.d, p7/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +clz z31.d, p7/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: clz z31.d, p7/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cnot z31.b, p7/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: cnot z31.b, p7/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cnt z31.d, p7/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: cnt z31.d, p7/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fabs z31.h, p7/z, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fabs z31.h, p7/z, z31.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fneg z31.d, p7/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fneg z31.d, p7/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +neg z31.s, p7/z, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: neg z31.s, p7/z, z31.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +not z31.b, p7/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: not z31.b, p7/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sxtb z31.h, p7/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sxtb z31.h, p7/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sxth z31.s, p7/z, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sxth z31.s, p7/z, z31.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sxtw z31.d, p7/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sxtw z31.d, p7/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uxtb z31.s, p7/z, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: uxtb z31.s, p7/z, z31.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uxth z31.d, p7/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: uxth z31.d, p7/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uxtw z31.d, p7/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: uxtw z31.d, p7/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid predicate + +abs z31.s, p8/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: abs z31.s, p8/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cls z31.b, p8/z, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: cls z31.b, p8/z, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +clz z31.b, p8/z, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: clz z31.b, p8/z, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cnot z31.b, p8/z, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: cnot z31.b, p8/z, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +cnt z31.b, p8/z, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: cnt z31.b, p8/z, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fabs z31.h, p8/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: fabs z31.h, p8/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fneg z31.h, p8/z, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: fneg z31.h, p8/z, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +neg z31.s, p8/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: neg z31.s, p8/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +not z31.b, p8/z, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: not z31.b, p8/z, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sxtb z31.s, p8/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: sxtb z31.s, p8/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sxth z0.s, p8/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: sxth z0.s, p8/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sxtw z0.d, p8/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: sxtw z0.d, p8/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uxtb z31.s, p8/z, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: uxtb z31.s, p8/z, z31.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uxth z0.s, p8/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: uxth z0.s, p8/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uxtw z0.d, p8/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: uxtw z0.d, p8/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.d, p0/z, z7.d +abs z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: abs z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +cls z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: cls z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.h, p0/z, z7.h +clz z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: clz z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +cnot z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: cnot z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.h, p0/z, z7.h +cnt z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: cnt z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +fabs z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fabs z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.h, p0/z, z7.h +fneg z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fneg z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +neg z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: neg z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.h, p0/z, z7.h +not z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: not z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +sxtb z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: sxtb z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.s, p0/z, z7.s +sxth z0.s, p0/z, z3.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: sxth z0.s, p0/z, z3.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +sxtw z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: sxtw z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +uxtb z0.h, p0/z, z3.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: uxtb z0.h, p0/z, z3.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0.s, p0/z, z7.s +uxth z0.s, p0/z, z3.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: uxth z0.s, p0/z, z3.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +uxtw z0.d, p0/z, z3.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: uxtw z0.d, p0/z, z3.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z.s b/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z.s new file mode 100644 index 000000000000000..c460602f31ae644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z.s @@ -0,0 +1,225 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// ABS + +abs z0.b, p0/z, z0.b // 00000100-00000110-10100000-00000000 +// CHECK-INST: abs z0.b, p0/z, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x06,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 0406a000 + +abs z31.d, p7/z, z31.d // 00000100-11000110-10111111-11111111 +// CHECK-INST: abs z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc6,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c6bfff + + +// CLS + +cls z0.b, p0/z, z0.b // 00000100-00001000-10100000-00000000 +// CHECK-INST: cls z0.b, p0/z, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x08,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 0408a000 + +clz z31.d, p7/z, z31.d // 00000100-11001001-10111111-11111111 +// CHECK-INST: clz z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc9,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c9bfff + +// CLZ + +clz z0.b, p0/z, z0.b // 00000100-00001001-10100000-00000000 +// CHECK-INST: clz z0.b, p0/z, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x09,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 0409a000 + +clz z31.d, p7/z, z31.d // 00000100-11001001-10111111-11111111 +// CHECK-INST: clz z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc9,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c9bfff + +// CNOT + +cnot z0.b, p0/z, z0.b // 00000100-00001011-10100000-00000000 +// CHECK-INST: cnot z0.b, p0/z, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x0b,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 040ba000 + +cnot z31.d, p7/z, z31.d // 00000100-11001011-10111111-11111111 +// CHECK-INST: cnot z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xcb,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04cbbfff + +// CNT + +cnt z0.b, p0/z, z0.b // 00000100-00001010-10100000-00000000 +// CHECK-INST: cnt z0.b, p0/z, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x0a,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 040aa000 + +cnt z31.d, p7/z, z31.d // 00000100-11001010-10111111-11111111 +// CHECK-INST: cnt z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xca,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04cabfff + + +// FABS + +fabs z0.h, p0/z, z0.h // 00000100-01001100-10100000-00000000 +// CHECK-INST: fabs z0.h, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xa0,0x4c,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 044ca000 + +fabs z31.d, p7/z, z31.d // 00000100-11001100-10111111-11111111 +// CHECK-INST: fabs z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xcc,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04ccbfff + +// FNEG + +fneg z0.h, p0/z, z0.h // 00000100-01001101-10100000-00000000 +// CHECK-INST: fneg z0.h, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xa0,0x4d,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 044da000 + +fneg z31.d, p7/z, z31.d // 00000100-11001101-10111111-11111111 +// CHECK-INST: fneg z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xcd,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04cdbfff + +// NEG + +neg z0.b, p0/z, z0.b // 00000100-00000111-10100000-00000000 +// CHECK-INST: neg z0.b, p0/z, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x07,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 0407a000 + +neg z31.d, p7/z, z31.d // 00000100-11000111-10111111-11111111 +// CHECK-INST: neg z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc7,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c7bfff + +//NOT + +not z0.b, p0/z, z0.b // 00000100-00001110-10100000-00000000 +// CHECK-INST: not z0.b, p0/z, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x0e,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 040ea000 + +not z31.d, p7/z, z31.d // 00000100-11001110-10111111-11111111 +// CHECK-INST: not z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xce,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04cebfff + +// SXTB + +sxtb z0.h, p0/z, z0.h // 00000100-01000000-10100000-00000000 +// CHECK-INST: sxtb z0.h, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xa0,0x40,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 0440a000 + +sxtb z31.d, p7/z, z31.d // 00000100-11000000-10111111-11111111 +// CHECK-INST: sxtb z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc0,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c0bfff + +// SXTH + +sxth z0.s, p0/z, z0.s // 00000100-10000010-10100000-00000000 +// CHECK-INST: sxth z0.s, p0/z, z0.s +// CHECK-ENCODING: [0x00,0xa0,0x82,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 0482a000 + +sxth z31.d, p7/z, z31.d // 00000100-11000010-10111111-11111111 +// CHECK-INST: sxth z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc2,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c2bfff + +// SXTW + +sxtw z0.d, p0/z, z0.d // 00000100-11000100-10100000-00000000 +// CHECK-INST: sxtw z0.d, p0/z, z0.d +// CHECK-ENCODING: [0x00,0xa0,0xc4,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c4a000 + +sxtw z31.d, p7/z, z31.d // 00000100-11000100-10111111-11111111 +// CHECK-INST: sxtw z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc4,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c4bfff + +// UXTB + +uxtb z0.h, p0/z, z0.h // 00000100-01000001-10100000-00000000 +// CHECK-INST: uxtb z0.h, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xa0,0x41,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 0441a000 + +uxtb z31.d, p7/z, z31.d // 00000100-11000001-10111111-11111111 +// CHECK-INST: uxtb z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc1,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c1bfff + +uxth z0.s, p0/z, z0.s // 00000100-10000011-10100000-00000000 +// CHECK-INST: uxth z0.s, p0/z, z0.s +// CHECK-ENCODING: [0x00,0xa0,0x83,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 0483a000 + +uxth z31.d, p7/z, z31.d // 00000100-11000011-10111111-11111111 +// CHECK-INST: uxth z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc3,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c3bfff + +// UXTW + +uxtw z0.d, p0/z, z0.d // 00000100-11000101-10100000-00000000 +// CHECK-INST: uxtw z0.d, p0/z, z0.d +// CHECK-ENCODING: [0x00,0xa0,0xc5,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c5a000 + +uxtw z31.d, p7/z, z31.d // 00000100-11000101-10111111-11111111 +// CHECK-INST: uxtw z31.d, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xbf,0xc5,0x04] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 04c5bfff From 05b6c2e4b933e7a3606899c72067c92b6077287b Mon Sep 17 00:00:00 2001 From: "Oleksandr T." Date: Tue, 29 Oct 2024 11:44:01 +0200 Subject: [PATCH 262/425] [Clang] fix range calculation for conditionals with throw expressions (#112081) Fixes #111854 --- The issue arises when `GetExprRange` encounters a `ConditionalOperator` with a `CXXThrowExpr` ```md ConditionalOperator 0x1108658e0 'int' |-CXXBoolLiteralExpr 0x110865878 '_Bool' true |-CXXThrowExpr 0x1108658a8 'void' | `-IntegerLiteral 0x110865888 'int' 0 `-IntegerLiteral 0x1108658c0 'int' 0 ``` https://github.com/llvm/llvm-project/blob/ed3d05178274890fb804f43ae1bcdfd33b5fd8f0/clang/lib/Sema/SemaChecking.cpp#L9628-L9631 The current behavior causes the `GetExprRange` to proceed with the throw expression (`CO->getTrueExpr()`/`void` type) --- clang/docs/ReleaseNotes.rst | 3 +- clang/lib/Sema/SemaChecking.cpp | 216 ++++++++++++++---------- clang/test/SemaCXX/conditional-expr.cpp | 7 + 3 files changed, 140 insertions(+), 86 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 424f02ef08d70ea..a39ffc8366dda41 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -571,7 +571,8 @@ Bug Fixes to C++ Support - Clang incorrectly considered a class with an anonymous union member to not be const-default-constructible even if a union member has a default member initializer. (#GH95854). -- Fixed an assertion failure when evaluating an invalid expression in an array initializer (#GH112140) +- Fixed an assertion failure when evaluating an invalid expression in an array initializer. (#GH112140) +- Fixed an assertion failure in range calculations for conditional throw expressions. (#GH111854) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index d027e4c6dfdb4dd..3308b898a5b68f4 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -9592,16 +9592,23 @@ static QualType GetExprType(const Expr *E) { return Ty; } -/// Pseudo-evaluate the given integer expression, estimating the -/// range of values it might take. +/// Attempts to estimate an approximate range for the given integer expression. +/// Returns a range if successful, otherwise it returns \c std::nullopt if a +/// reliable estimation cannot be determined. /// /// \param MaxWidth The width to which the value will be truncated. -/// \param Approximate If \c true, return a likely range for the result: in -/// particular, assume that arithmetic on narrower types doesn't leave -/// those types. If \c false, return a range including all possible -/// result values. -static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth, - bool InConstantContext, bool Approximate) { +/// \param InConstantContext If \c true, interpret the expression within a +/// constant context. +/// \param Approximate If \c true, provide a likely range of values by assuming +/// that arithmetic on narrower types remains within those types. +/// If \c false, return a range that includes all possible values +/// resulting from the expression. +/// \returns A range of values that the expression might take, or +/// std::nullopt if a reliable estimation cannot be determined. +static std::optional TryGetExprRange(ASTContext &C, const Expr *E, + unsigned MaxWidth, + bool InConstantContext, + bool Approximate) { E = E->IgnoreParens(); // Try a full evaluation first. @@ -9614,8 +9621,8 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth, // being of the new, wider type. if (const auto *CE = dyn_cast(E)) { if (CE->getCastKind() == CK_NoOp || CE->getCastKind() == CK_LValueToRValue) - return GetExprRange(C, CE->getSubExpr(), MaxWidth, InConstantContext, - Approximate); + return TryGetExprRange(C, CE->getSubExpr(), MaxWidth, InConstantContext, + Approximate); IntRange OutputTypeRange = IntRange::forValueOfType(C, GetExprType(CE)); @@ -9626,40 +9633,52 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth, if (!isIntegerCast) return OutputTypeRange; - IntRange SubRange = GetExprRange(C, CE->getSubExpr(), - std::min(MaxWidth, OutputTypeRange.Width), - InConstantContext, Approximate); + std::optional SubRange = TryGetExprRange( + C, CE->getSubExpr(), std::min(MaxWidth, OutputTypeRange.Width), + InConstantContext, Approximate); + if (!SubRange) + return std::nullopt; // Bail out if the subexpr's range is as wide as the cast type. - if (SubRange.Width >= OutputTypeRange.Width) + if (SubRange->Width >= OutputTypeRange.Width) return OutputTypeRange; // Otherwise, we take the smaller width, and we're non-negative if // either the output type or the subexpr is. - return IntRange(SubRange.Width, - SubRange.NonNegative || OutputTypeRange.NonNegative); + return IntRange(SubRange->Width, + SubRange->NonNegative || OutputTypeRange.NonNegative); } if (const auto *CO = dyn_cast(E)) { // If we can fold the condition, just take that operand. bool CondResult; if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C)) - return GetExprRange(C, - CondResult ? CO->getTrueExpr() : CO->getFalseExpr(), - MaxWidth, InConstantContext, Approximate); + return TryGetExprRange( + C, CondResult ? CO->getTrueExpr() : CO->getFalseExpr(), MaxWidth, + InConstantContext, Approximate); // Otherwise, conservatively merge. - // GetExprRange requires an integer expression, but a throw expression + // TryGetExprRange requires an integer expression, but a throw expression // results in a void type. - Expr *E = CO->getTrueExpr(); - IntRange L = E->getType()->isVoidType() - ? IntRange{0, true} - : GetExprRange(C, E, MaxWidth, InConstantContext, Approximate); - E = CO->getFalseExpr(); - IntRange R = E->getType()->isVoidType() - ? IntRange{0, true} - : GetExprRange(C, E, MaxWidth, InConstantContext, Approximate); - return IntRange::join(L, R); + Expr *TrueExpr = CO->getTrueExpr(); + if (TrueExpr->getType()->isVoidType()) + return std::nullopt; + + std::optional L = + TryGetExprRange(C, TrueExpr, MaxWidth, InConstantContext, Approximate); + if (!L) + return std::nullopt; + + Expr *FalseExpr = CO->getFalseExpr(); + if (FalseExpr->getType()->isVoidType()) + return std::nullopt; + + std::optional R = + TryGetExprRange(C, FalseExpr, MaxWidth, InConstantContext, Approximate); + if (!R) + return std::nullopt; + + return IntRange::join(*L, *R); } if (const auto *BO = dyn_cast(E)) { @@ -9696,8 +9715,8 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth, // been coerced to the LHS type. case BO_Assign: // TODO: bitfields? - return GetExprRange(C, BO->getRHS(), MaxWidth, InConstantContext, - Approximate); + return TryGetExprRange(C, BO->getRHS(), MaxWidth, InConstantContext, + Approximate); // Operations with opaque sources are black-listed. case BO_PtrMemD: @@ -9729,18 +9748,20 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth, // Right shift by a constant can narrow its left argument. case BO_Shr: case BO_ShrAssign: { - IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth, InConstantContext, - Approximate); + std::optional L = TryGetExprRange( + C, BO->getLHS(), MaxWidth, InConstantContext, Approximate); + if (!L) + return std::nullopt; // If the shift amount is a positive constant, drop the width by // that much. if (std::optional shift = BO->getRHS()->getIntegerConstantExpr(C)) { if (shift->isNonNegative()) { - if (shift->uge(L.Width)) - L.Width = (L.NonNegative ? 0 : 1); + if (shift->uge(L->Width)) + L->Width = (L->NonNegative ? 0 : 1); else - L.Width -= shift->getZExtValue(); + L->Width -= shift->getZExtValue(); } } @@ -9749,8 +9770,8 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth, // Comma acts as its right operand. case BO_Comma: - return GetExprRange(C, BO->getRHS(), MaxWidth, InConstantContext, - Approximate); + return TryGetExprRange(C, BO->getRHS(), MaxWidth, InConstantContext, + Approximate); case BO_Add: if (!Approximate) @@ -9774,26 +9795,31 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth, case BO_Div: { // Don't 'pre-truncate' the operands. unsigned opWidth = C.getIntWidth(GetExprType(E)); - IntRange L = GetExprRange(C, BO->getLHS(), opWidth, InConstantContext, - Approximate); + std::optional L = TryGetExprRange( + C, BO->getLHS(), opWidth, InConstantContext, Approximate); + if (!L) + return std::nullopt; // If the divisor is constant, use that. if (std::optional divisor = BO->getRHS()->getIntegerConstantExpr(C)) { unsigned log2 = divisor->logBase2(); // floor(log_2(divisor)) - if (log2 >= L.Width) - L.Width = (L.NonNegative ? 0 : 1); + if (log2 >= L->Width) + L->Width = (L->NonNegative ? 0 : 1); else - L.Width = std::min(L.Width - log2, MaxWidth); + L->Width = std::min(L->Width - log2, MaxWidth); return L; } // Otherwise, just use the LHS's width. // FIXME: This is wrong if the LHS could be its minimal value and the RHS // could be -1. - IntRange R = GetExprRange(C, BO->getRHS(), opWidth, InConstantContext, - Approximate); - return IntRange(L.Width, L.NonNegative && R.NonNegative); + std::optional R = TryGetExprRange( + C, BO->getRHS(), opWidth, InConstantContext, Approximate); + if (!R) + return std::nullopt; + + return IntRange(L->Width, L->NonNegative && R->NonNegative); } case BO_Rem: @@ -9810,11 +9836,17 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth, // performed the computation. QualType T = GetExprType(E); unsigned opWidth = C.getIntWidth(T); - IntRange L = - GetExprRange(C, BO->getLHS(), opWidth, InConstantContext, Approximate); - IntRange R = - GetExprRange(C, BO->getRHS(), opWidth, InConstantContext, Approximate); - IntRange C = Combine(L, R); + std::optional L = TryGetExprRange(C, BO->getLHS(), opWidth, + InConstantContext, Approximate); + if (!L) + return std::nullopt; + + std::optional R = TryGetExprRange(C, BO->getRHS(), opWidth, + InConstantContext, Approximate); + if (!R) + return std::nullopt; + + IntRange C = Combine(*L, *R); C.NonNegative |= T->isUnsignedIntegerOrEnumerationType(); C.Width = std::min(C.Width, MaxWidth); return C; @@ -9832,26 +9864,30 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth, return IntRange::forValueOfType(C, GetExprType(E)); default: - return GetExprRange(C, UO->getSubExpr(), MaxWidth, InConstantContext, - Approximate); + return TryGetExprRange(C, UO->getSubExpr(), MaxWidth, InConstantContext, + Approximate); } } if (const auto *OVE = dyn_cast(E)) - return GetExprRange(C, OVE->getSourceExpr(), MaxWidth, InConstantContext, - Approximate); + return TryGetExprRange(C, OVE->getSourceExpr(), MaxWidth, InConstantContext, + Approximate); if (const auto *BitField = E->getSourceBitField()) return IntRange(BitField->getBitWidthValue(C), BitField->getType()->isUnsignedIntegerOrEnumerationType()); + if (GetExprType(E)->isVoidType()) + return std::nullopt; + return IntRange::forValueOfType(C, GetExprType(E)); } -static IntRange GetExprRange(ASTContext &C, const Expr *E, - bool InConstantContext, bool Approximate) { - return GetExprRange(C, E, C.getIntWidth(GetExprType(E)), InConstantContext, - Approximate); +static std::optional TryGetExprRange(ASTContext &C, const Expr *E, + bool InConstantContext, + bool Approximate) { + return TryGetExprRange(C, E, C.getIntWidth(GetExprType(E)), InConstantContext, + Approximate); } /// Checks whether the given value, which currently has the given @@ -10096,8 +10132,10 @@ static bool CheckTautologicalComparison(Sema &S, BinaryOperator *E, S.Context.hasSameUnqualifiedType(Constant->getType(), Other->getType())) return false; - IntRange OtherValueRange = GetExprRange( + std::optional OtherValueRange = TryGetExprRange( S.Context, Other, S.isConstantEvaluatedContext(), /*Approximate=*/false); + if (!OtherValueRange) + return false; QualType OtherT = Other->getType(); if (const auto *AT = OtherT->getAs()) @@ -10115,11 +10153,11 @@ static bool CheckTautologicalComparison(Sema &S, BinaryOperator *E, bool OtherIsBooleanDespiteType = !OtherT->isBooleanType() && Other->isKnownToHaveBooleanValue(); if (OtherIsBooleanDespiteType || IsObjCSignedCharBool) - OtherTypeRange = OtherValueRange = IntRange::forBoolType(); + OtherTypeRange = *OtherValueRange = IntRange::forBoolType(); // Check if all values in the range of possible values of this expression // lead to the same comparison outcome. - PromotedRange OtherPromotedValueRange(OtherValueRange, Value.getBitWidth(), + PromotedRange OtherPromotedValueRange(*OtherValueRange, Value.getBitWidth(), Value.isUnsigned()); auto Cmp = OtherPromotedValueRange.compare(Value); auto Result = PromotedRange::constantValue(E->getOpcode(), Cmp, RhsConstant); @@ -10143,7 +10181,7 @@ static bool CheckTautologicalComparison(Sema &S, BinaryOperator *E, // Don't warn if the non-constant operand actually always evaluates to the // same value. - if (!TautologicalTypeCompare && OtherValueRange.Width == 0) + if (!TautologicalTypeCompare && OtherValueRange->Width == 0) return false; // Suppress the diagnostic for an in-range comparison if the constant comes @@ -10182,7 +10220,7 @@ static bool CheckTautologicalComparison(Sema &S, BinaryOperator *E, if (!TautologicalTypeCompare) { S.Diag(E->getOperatorLoc(), diag::warn_tautological_compare_value_range) - << RhsConstant << OtherValueRange.Width << OtherValueRange.NonNegative + << RhsConstant << OtherValueRange->Width << OtherValueRange->NonNegative << E->getOpcodeStr() << OS.str() << *Result << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange(); return true; @@ -10312,9 +10350,11 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) { } // Otherwise, calculate the effective range of the signed operand. - IntRange signedRange = - GetExprRange(S.Context, signedOperand, S.isConstantEvaluatedContext(), - /*Approximate=*/true); + std::optional signedRange = + TryGetExprRange(S.Context, signedOperand, S.isConstantEvaluatedContext(), + /*Approximate=*/true); + if (!signedRange) + return; // Go ahead and analyze implicit conversions in the operands. Note // that we skip the implicit conversions on both sides. @@ -10322,7 +10362,7 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) { AnalyzeImplicitConversions(S, RHS, E->getOperatorLoc()); // If the signed range is non-negative, -Wsign-compare won't fire. - if (signedRange.NonNegative) + if (signedRange->NonNegative) return; // For (in)equality comparisons, if the unsigned operand is a @@ -10331,15 +10371,17 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) { // change the result of the comparison. if (E->isEqualityOp()) { unsigned comparisonWidth = S.Context.getIntWidth(T); - IntRange unsignedRange = - GetExprRange(S.Context, unsignedOperand, S.isConstantEvaluatedContext(), - /*Approximate=*/true); + std::optional unsignedRange = TryGetExprRange( + S.Context, unsignedOperand, S.isConstantEvaluatedContext(), + /*Approximate=*/true); + if (!unsignedRange) + return; // We should never be unable to prove that the unsigned operand is // non-negative. - assert(unsignedRange.NonNegative && "unsigned range includes negative?"); + assert(unsignedRange->NonNegative && "unsigned range includes negative?"); - if (unsignedRange.Width < comparisonWidth) + if (unsignedRange->Width < comparisonWidth) return; } @@ -11146,10 +11188,12 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC, if (SourceBT && TargetBT && SourceBT->isIntegerType() && TargetBT->isFloatingType() && !IsListInit) { // Determine the number of precision bits in the source integer type. - IntRange SourceRange = - GetExprRange(Context, E, isConstantEvaluatedContext(), - /*Approximate=*/true); - unsigned int SourcePrecision = SourceRange.Width; + std::optional SourceRange = + TryGetExprRange(Context, E, isConstantEvaluatedContext(), + /*Approximate=*/true); + if (!SourceRange) + return; + unsigned int SourcePrecision = SourceRange->Width; // Determine the number of precision bits in the // target floating point type. @@ -11212,14 +11256,16 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC, E, Diag(CC, diag::warn_impcast_int_to_objc_signed_char_bool) << E->getType()); } + std::optional LikelySourceRange = TryGetExprRange( + Context, E, isConstantEvaluatedContext(), /*Approximate=*/true); + if (!LikelySourceRange) + return; IntRange SourceTypeRange = IntRange::forTargetOfCanonicalType(Context, Source); - IntRange LikelySourceRange = GetExprRange( - Context, E, isConstantEvaluatedContext(), /*Approximate=*/true); IntRange TargetRange = IntRange::forTargetOfCanonicalType(Context, Target); - if (LikelySourceRange.Width > TargetRange.Width) { + if (LikelySourceRange->Width > TargetRange.Width) { // If the source is a constant, use a default-on diagnostic. // TODO: this should happen for bitfield stores, too. Expr::EvalResult Result; @@ -11266,8 +11312,8 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC, } } - if (TargetRange.Width == LikelySourceRange.Width && - !TargetRange.NonNegative && LikelySourceRange.NonNegative && + if (TargetRange.Width == LikelySourceRange->Width && + !TargetRange.NonNegative && LikelySourceRange->NonNegative && Source->isSignedIntegerType()) { // Warn when doing a signed to signed conversion, warn if the positive // source value is exactly the width of the target type, which will @@ -11293,9 +11339,9 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC, } if ((!isa(Target) || !isa(Source)) && - ((TargetRange.NonNegative && !LikelySourceRange.NonNegative) || - (!TargetRange.NonNegative && LikelySourceRange.NonNegative && - LikelySourceRange.Width == TargetRange.Width))) { + ((TargetRange.NonNegative && !LikelySourceRange->NonNegative) || + (!TargetRange.NonNegative && LikelySourceRange->NonNegative && + LikelySourceRange->Width == TargetRange.Width))) { if (SourceMgr.isInSystemMacro(CC)) return; diff --git a/clang/test/SemaCXX/conditional-expr.cpp b/clang/test/SemaCXX/conditional-expr.cpp index 01effaa189322bc..8f17555fd806ff8 100644 --- a/clang/test/SemaCXX/conditional-expr.cpp +++ b/clang/test/SemaCXX/conditional-expr.cpp @@ -429,3 +429,10 @@ void g() { long e = a = b ? throw 0 : throw 1; } } // namespace PR46484 + +namespace GH111854 { +void f() { + (true ? throw 0 : 0) <= 0; // expected-warning {{relational comparison result unused}} + (false ? 0 : throw 0) <= 0; // expected-warning {{relational comparison result unused}} +} +} From dfb60bb9193d78d0980193e1ade715cffbb55af8 Mon Sep 17 00:00:00 2001 From: Rohit Aggarwal <44664450+rohitaggarwal007@users.noreply.github.com> Date: Tue, 29 Oct 2024 15:39:55 +0530 Subject: [PATCH 263/425] Adding more vector calls for -fveclib=AMDLIBM (#109662) AMD has it's own implementation of vector calls. New vector calls are introduced in the library for exp10, log10, sincos and finite asin/acos Please refer [https://github.com/amd/aocl-libm-ose] --------- Co-authored-by: Rohit Aggarwal --- .../include/llvm/Analysis/TargetLibraryInfo.h | 1 + .../llvm/Analysis/TargetTransformInfoImpl.h | 3 +- llvm/include/llvm/Analysis/VecFuncs.def | 24 ++ llvm/lib/Analysis/ValueTracking.cpp | 4 + llvm/lib/Analysis/VectorUtils.cpp | 1 + .../LoopVectorize/X86/amdlibm-calls-finite.ll | 275 ++++++++++++------ .../LoopVectorize/X86/amdlibm-calls.ll | 216 ++++++++++++++ llvm/test/Transforms/Util/add-TLI-mappings.ll | 27 +- 8 files changed, 455 insertions(+), 96 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index aeb8de3973f7321..5347c64e43e718f 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -416,6 +416,7 @@ class TargetLibraryInfo { case LibFunc_cos: case LibFunc_cosf: case LibFunc_cosl: case LibFunc_cosh: case LibFunc_coshf: case LibFunc_coshl: case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l: + case LibFunc_exp10: case LibFunc_exp10f: case LibFunc_exp10l: case LibFunc_fabs: case LibFunc_fabsf: case LibFunc_fabsl: case LibFunc_floor: case LibFunc_floorf: case LibFunc_floorl: case LibFunc_fmax: case LibFunc_fmaxf: case LibFunc_fmaxl: diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 0b7792f89a05c43..317c13917c0cfc8 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -177,7 +177,8 @@ class TargetTransformInfoImplBase { Name == "sinh" || Name == "sinhf" || Name == "sinhl" || Name == "cosh" || Name == "coshf" || Name == "coshl" || Name == "tanh" || Name == "tanhf" || Name == "tanhl" || - Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") + Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" || + Name == "exp10" || Name == "exp10l" || Name == "exp10f") return false; // clang-format on // These are all likely to be optimized into something smaller. diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index c4586894e3e490b..71ad3a35eb3f5e0 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -1328,14 +1328,17 @@ TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd2_log2", FIXED(2), NOMASK, "_ZGV_LLV TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd4_log2", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd8_log2", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("log10", "amd_vrd2_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("log10f", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("log10f", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("log10f", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__log10_finite", "amd_vrd2_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("llvm.log10.f64", "amd_vrd2_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") @@ -1350,6 +1353,12 @@ TLI_DEFINE_VECFUNC("erf", "amd_vrd8_erf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("exp10", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("exp10f", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__exp10_finite", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("__exp10f_finite", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") + +TLI_DEFINE_VECFUNC("llvm.exp10.f64", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") +TLI_DEFINE_VECFUNC("llvm.exp10.f32", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") + TLI_DEFINE_VECFUNC("expm1", "amd_vrd2_expm1", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("expm1f", "amd_vrs4_expm1f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") @@ -1380,10 +1389,19 @@ TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LL TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__asin_finite", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__asinf_finite", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__asinf_finite", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__asinf_finite", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") + TLI_DEFINE_VECFUNC("acosf", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") TLI_DEFINE_VECFUNC("acosf", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("acosf", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") +TLI_DEFINE_VECFUNC("__acosf_finite", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("__acosf_finite", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") +TLI_DEFINE_VECFUNC("__acosf_finite", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") + TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v") TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v") TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") @@ -1421,6 +1439,12 @@ TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_ TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") +TLI_DEFINE_VECFUNC("sincos", "amd_vrd4_sincos", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl8l8") +TLI_DEFINE_VECFUNC("sincos", "amd_vrd8_sincos", FIXED(8), NOMASK, "_ZGV_LLVM_N8vl8l8") + +TLI_DEFINE_VECFUNC("sincosf", "amd_vrs4_sincosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4") +TLI_DEFINE_VECFUNC("sincosf", "amd_vrs8_sincosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8vl4l4") +TLI_DEFINE_VECFUNC("sincosf", "amd_vrs16_sincosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16vl4l4") #else #error "Must choose which vector library functions are to be defined." #endif diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index e9ed8b3c862b555..aa5142f33624099 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4241,6 +4241,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB, case LibFunc_exp2f: case LibFunc_exp2l: return Intrinsic::exp2; + case LibFunc_exp10: + case LibFunc_exp10f: + case LibFunc_exp10l: + return Intrinsic::exp10; case LibFunc_log: case LibFunc_logf: case LibFunc_logl: diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 37c443011719b62..cd5cf0443541fc8 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -76,6 +76,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { case Intrinsic::cosh: case Intrinsic::tanh: case Intrinsic::exp: + case Intrinsic::exp10: case Intrinsic::exp2: case Intrinsic::log: case Intrinsic::log10: diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll index 54bb9352f3c89c6..9899eded7380864 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll @@ -7,12 +7,10 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare float @__expf_finite(float) #0 - +define void @exp_f32(ptr nocapture %varray) { ; CHECK-LABEL: @exp_f32 ; CHECK: <4 x float> @amd_vrs4_expf ; CHECK: ret -define void @exp_f32(ptr nocapture %varray) { entry: br label %for.body @@ -25,23 +23,16 @@ for.body: ; preds = %for.body, %entry store float %call, ptr %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body ret void } -!1 = distinct !{!1, !2, !3} -!2 = !{!"llvm.loop.vectorize.width", i32 4} -!3 = !{!"llvm.loop.vectorize.enable", i1 true} - - -declare double @__exp_finite(double) #0 - +define void @exp_f64(ptr nocapture %varray) { ; CHECK-LABEL: @exp_f64 ; CHECK: <4 x double> @amd_vrd4_exp ; CHECK: ret -define void @exp_f64(ptr nocapture %varray) { entry: br label %for.body @@ -54,25 +45,16 @@ for.body: ; preds = %for.body, %entry store double %call, ptr %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body ret void } -!11 = distinct !{!11, !12, !13} -!12 = !{!"llvm.loop.vectorize.width", i32 4} -!13 = !{!"llvm.loop.vectorize.enable", i1 true} - - - - -declare float @__logf_finite(float) #0 - +define void @log_f32(ptr nocapture %varray) { ; CHECK-LABEL: @log_f32 ; CHECK: <4 x float> @amd_vrs4_logf ; CHECK: ret -define void @log_f32(ptr nocapture %varray) { entry: br label %for.body @@ -85,23 +67,16 @@ for.body: ; preds = %for.body, %entry store float %call, ptr %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body ret void } -!21 = distinct !{!21, !22, !23} -!22 = !{!"llvm.loop.vectorize.width", i32 4} -!23 = !{!"llvm.loop.vectorize.enable", i1 true} - - -declare double @__log_finite(double) #0 - +define void @log_f64(ptr nocapture %varray) { ; CHECK-LABEL: @log_f64 ; CHECK: <4 x double> @amd_vrd4_log ; CHECK: ret -define void @log_f64(ptr nocapture %varray) { entry: br label %for.body @@ -114,23 +89,16 @@ for.body: ; preds = %for.body, %entry store double %call, ptr %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body ret void } -!31 = distinct !{!31, !32, !33} -!32 = !{!"llvm.loop.vectorize.width", i32 4} -!33 = !{!"llvm.loop.vectorize.enable", i1 true} - - -declare float @__powf_finite(float, float) #0 - +define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) { ; CHECK-LABEL: @pow_f32 ; CHECK: <4 x float> @amd_vrs4_powf ; CHECK: ret -define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) { entry: br label %for.body @@ -145,23 +113,16 @@ for.body: ; preds = %for.body, %entry store float %tmp2, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body ret void } -!41 = distinct !{!41, !42, !43} -!42 = !{!"llvm.loop.vectorize.width", i32 4} -!43 = !{!"llvm.loop.vectorize.enable", i1 true} - - -declare double @__pow_finite(double, double) #0 - +define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) { ; CHECK-LABEL: @pow_f64 ; CHECK: <4 x double> @amd_vrd4_pow ; CHECK: ret -define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) { entry: br label %for.body @@ -176,18 +137,12 @@ for.body: ; preds = %for.body, %entry store double %tmp2, ptr %arrayidx2, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body ret void } -!51 = distinct !{!51, !52, !53} -!52 = !{!"llvm.loop.vectorize.width", i32 4} -!53 = !{!"llvm.loop.vectorize.enable", i1 true} - -declare float @__exp2f_finite(float) #0 - define void @exp2f_finite(ptr nocapture %varray) { ; CHECK-LABEL: @exp2f_finite( ; CHECK: call <4 x float> @amd_vrs4_exp2f(<4 x float> %{{.*}}) @@ -205,18 +160,12 @@ for.body: store float %call, ptr %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ret void } -!61 = distinct !{!61, !62, !63} -!62 = !{!"llvm.loop.vectorize.width", i32 4} -!63 = !{!"llvm.loop.vectorize.enable", i1 true} - -declare double @__exp2_finite(double) #0 - define void @exp2_finite(ptr nocapture %varray) { ; CHECK-LABEL: @exp2_finite( ; CHECK: call <4 x double> @amd_vrd4_exp2(<4 x double> {{.*}}) @@ -234,22 +183,16 @@ for.body: store double %call, ptr %arrayidx, align 4 %iv.next = add nuw nsw i64 %iv, 1 %exitcond = icmp eq i64 %iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ret void } -!71 = distinct !{!71, !72, !73} -!72 = !{!"llvm.loop.vectorize.width", i32 4} -!73 = !{!"llvm.loop.vectorize.enable", i1 true} - -declare float @__log2f_finite(float) #0 - +define void @log2_f32(ptr nocapture %varray) { ; CHECK-LABEL: @log2_f32 ; CHECK: <4 x float> @amd_vrs4_log2f ; CHECK: ret -define void @log2_f32(ptr nocapture %varray) { entry: br label %for.body @@ -262,23 +205,16 @@ for.body: ; preds = %for.body, %entry store float %call, ptr %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body ret void } -!81 = distinct !{!21, !22, !23} -!82 = !{!"llvm.loop.vectorize.width", i32 4} -!83 = !{!"llvm.loop.vectorize.enable", i1 true} - - -declare double @__log2_finite(double) #0 - +define void @log2_f64(ptr nocapture %varray) { ; CHECK-LABEL: @log2_f64 ; CHECK: <4 x double> @amd_vrd4_log2 ; CHECK: ret -define void @log2_f64(ptr nocapture %varray) { entry: br label %for.body @@ -291,22 +227,16 @@ for.body: ; preds = %for.body, %entry store double %call, ptr %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body ret void } -!91 = distinct !{!31, !32, !33} -!92 = !{!"llvm.loop.vectorize.width", i32 4} -!93 = !{!"llvm.loop.vectorize.enable", i1 true} - -declare float @__log10f_finite(float) #0 - +define void @log10_f32(ptr nocapture %varray) { ; CHECK-LABEL: @log10_f32 ; CHECK: <4 x float> @amd_vrs4_log10f ; CHECK: ret -define void @log10_f32(ptr nocapture %varray) { entry: br label %for.body @@ -319,14 +249,173 @@ for.body: ; preds = %for.body, %entry store float %call, ptr %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, 1000 - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 + +for.end: ; preds = %for.body + ret void +} + +define void @log10_finite(ptr nocapture %varray) { +; CHECK-LABEL: @log10_finite( +; CHECK: call <2 x double> @amd_vrd2_log10(<2 x double> {{.*}}) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @__log10_finite(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +define void @exp10_finite(ptr nocapture %varray) { +; CHECK-LABEL: @exp10_finite( +; CHECK: call <2 x double> @amd_vrd2_exp10(<2 x double> {{.*}}) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @__exp10_finite(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 + +for.end: + ret void +} + +define void @exp10_f32(ptr nocapture %varray) { +; CHECK-LABEL: @exp10_f32 +; CHECK: <4 x float> @amd_vrs4_exp10f +; CHECK: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__exp10f_finite(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 for.end: ; preds = %for.body ret void } -!101 = distinct !{!21, !22, !23} -!102 = !{!"llvm.loop.vectorize.width", i32 4} -!103 = !{!"llvm.loop.vectorize.enable", i1 true} +define void @asin_finite(ptr nocapture %varray) { +; CHECK-LABEL: @asin_finite( +; CHECK: call <8 x double> @amd_vrd8_asin(<8 x double> {{.*}}) +; CHECK: ret void +; +entry: + br label %for.body +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @__asin_finite(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7 + +for.end: + ret void +} +define void @asinf_finite(ptr nocapture %varray) { +; CHECK-LABEL: @asinf_finite +; CHECK: <4 x float> @amd_vrs4_asinf +; CHECK: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__asinf_finite(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 + +for.end: ; preds = %for.body + ret void +} + +define void @acosf_finite(ptr nocapture %varray) { +; CHECK-LABEL: @acosf_finite +; CHECK: <4 x float> @amd_vrs4_acosf +; CHECK: ret +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call fast float @__acosf_finite(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv + store float %call, ptr %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 + +for.end: ; preds = %for.body + ret void +} + +!1 = distinct !{!1, !2, !3} +!2 = !{!"llvm.loop.vectorize.width", i32 2} +!3 = !{!"llvm.loop.vectorize.enable", i1 true} + +!4 = distinct !{!4, !5, !6} +!5 = !{!"llvm.loop.vectorize.width", i32 4} +!6 = !{!"llvm.loop.vectorize.enable", i1 true} + +!7 = distinct !{!7, !8, !9} +!8 = !{!"llvm.loop.vectorize.width", i32 8} +!9 = !{!"llvm.loop.vectorize.enable", i1 true} + +declare float @__expf_finite(float) #0 +declare double @__exp_finite(double) #0 +declare double @__log_finite(double) #0 +declare float @__logf_finite(float) #0 +declare float @__powf_finite(float, float) #0 +declare double @__pow_finite(double, double) #0 +declare float @__exp2f_finite(float) #0 +declare double @__exp2_finite(double) #0 +declare float @__log2f_finite(float) #0 +declare double @__log2_finite(double) #0 +declare float @__log10f_finite(float) #0 +declare double @__log10_finite(double) #0 +declare double @__exp10_finite(double) #0 +declare float @__exp10f_finite(float) #0 +declare double @__asin_finite(double) #0 +declare float @__asinf_finite(float) #0 +declare float @__acosf_finite(float) #0 \ No newline at end of file diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll index 4acc7fe7eaccf61..4ced0372e5da381 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll @@ -1444,6 +1444,32 @@ for.end: ret void } +define void @log10_f64(ptr nocapture %varray) { +; CHECK-LABEL: @log10_f64( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]]) +; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]]) +; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]]) +; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @log10(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @log10_f32(ptr nocapture %varray) { ; CHECK-LABEL: @log10_f32( ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]]) @@ -1470,6 +1496,32 @@ for.end: ret void } +define void @log10_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @log10_f64_intrinsic( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]]) +; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]]) +; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]]) +; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.log10.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + define void @log10_f32_intrinsic(ptr nocapture %varray) { ; CHECK-LABEL: @log10_f32_intrinsic( ; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]]) @@ -1600,4 +1652,168 @@ for.end: ret void } +define void @exp10_f64(ptr nocapture %varray) { +; CHECK-LABEL: @exp10_f64( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]]) +; CHECK-VF4: call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]]) +; CHECK-VF8: call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]]) +; CHECK-VF16: call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @exp10(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp10_f32(ptr nocapture %varray) { +; CHECK-LABEL: @exp10_f32( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]]) +; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]]) +; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]]) +; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @exp10f(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp10_f64_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @exp10_f64_intrinsic( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]]) +; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]]) +; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]]) +; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to double + %call = tail call double @llvm.exp10.f64(double %conv) + %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv + store double %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @exp10_f32_intrinsic(ptr nocapture %varray) { +; CHECK-LABEL: @exp10_f32_intrinsic( +; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]]) +; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]]) +; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]]) +; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %tmp = trunc i64 %iv to i32 + %conv = sitofp i32 %tmp to float + %call = tail call float @llvm.exp10.f32(float %conv) + %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv + store float %call, ptr %arrayidx, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 1000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + + +define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; CHECK-LABEL: define void @sincos_f64 +; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) +; CHECK-VF2-NOT: call void @amd_vrd2_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; CHECK-VF4-NOT: call void @amd_vrd4_sincos(<4 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; CHECK-VF8-NOT: call void @amd_vrd8_sincos(<8 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepa = getelementptr double, ptr %a, i64 %indvars.iv + %num = load double, ptr %gepa, align 8 + %gepb = getelementptr double, ptr %b, i64 %indvars.iv + %gepc = getelementptr double, ptr %c, i64 %indvars.iv + call void @sincos(double %num, ptr %gepb, ptr %gepc) + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) { +; CHECK-LABEL: define void @sincos_f32 +; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) +; CHECK-VF4-NOT: call void @amd_vrs4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; CHECK-VF8-NOT: call void @amd_vrs8_sincosf(<8 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; CHECK-VF16-NOT: call void @amd_vrs16_sincosf(<16 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]]) +; CHECK: ret void +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepa = getelementptr float, ptr %a, i64 %indvars.iv + %num = load float, ptr %gepa, align 8 + %gepb = getelementptr float, ptr %b, i64 %indvars.iv + %gepc = getelementptr float, ptr %c, i64 %indvars.iv + call void @sincosf(float %num, ptr %gepb, ptr %gepc) + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + attributes #0 = { nounwind readnone } + +declare double @exp10(double) #0 +declare float @exp10f(float) #0 +declare double @llvm.exp10.f64(double) #0 +declare float @llvm.exp10.f32(float) #0 +declare void @sincos(double, ptr, ptr) +declare void @sincosf(float, ptr, ptr) diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll index 4e4b81e89a32700..76cccbd6f39cc39 100644 --- a/llvm/test/Transforms/Util/add-TLI-mappings.ll +++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll @@ -14,10 +14,15 @@ ; SVML-SAME: ptr @__svml_log10f4, ; SVML-SAME: ptr @__svml_log10f8, ; SVML-SAME: ptr @__svml_log10f16 -; AMDLIBM-SAME: [6 x ptr] [ +; AMDLIBM-SAME: [11 x ptr] [ ; AMDLIBM-SAME: ptr @amd_vrd2_sin, ; AMDLIBM-SAME: ptr @amd_vrd4_sin, ; AMDLIBM-SAME: ptr @amd_vrd8_sin, +; AMDLIBM-SAME: ptr @amd_vrd4_sincos, +; AMDLIBM-SAME: ptr @amd_vrd8_sincos, +; AMDLIBM-SAME: ptr @amd_vrs4_sincosf, +; AMDLIBM-SAME: ptr @amd_vrs8_sincosf, +; AMDLIBM-SAME: ptr @amd_vrs16_sincosf ; AMDLIBM-SAME: ptr @amd_vrs4_log10f, ; AMDLIBM-SAME: ptr @amd_vrs8_log10f, ; AMDLIBM-SAME: ptr @amd_vrs16_log10f @@ -106,6 +111,7 @@ define void @sincos_f64(double %in, ptr %sin, ptr %cos) { ; COMMON-LABEL: @sincos_f64( ; SLEEFGNUABI: call void @sincos(double %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOS:[0-9]+]] ; ARMPL: call void @sincos(double %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOS:[0-9]+]] +; AMDLIBM: call void @sincos(double %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOS:[0-9]+]] call void @sincos(double %in, ptr %sin, ptr %cos) ret void } @@ -116,6 +122,7 @@ define void @sincos_f32(float %in, ptr %sin, ptr %cos) { ; COMMON-LABEL: @sincos_f32( ; SLEEFGNUABI: call void @sincosf(float %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOSF:[0-9]+]] ; ARMPL: call void @sincosf(float %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOSF:[0-9]+]] +; AMDLIBM: call void @sincosf(float %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOSF:[0-9]+]] call void @sincosf(float %in, ptr %sin, ptr %cos) ret void } @@ -145,7 +152,7 @@ declare void @sincospif(float, ptr, ptr) #0 define float @call_llvm.log10.f32(float %in) { ; COMMON-LABEL: @call_llvm.log10.f32( ; SVML: call float @llvm.log10.f32(float %{{.*}}) -; AMDLIBM: call float @llvm.log10.f32(float %{{.*}}) +; AMDLIBM: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; LIBMVEC-X86: call float @llvm.log10.f32(float %{{.*}}) ; MASSV: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] ; ACCELERATE: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]] @@ -171,6 +178,11 @@ declare float @llvm.log10.f32(float) #0 ; AMDLIBM: declare <2 x double> @amd_vrd2_sin(<2 x double>) ; AMDLIBM: declare <4 x double> @amd_vrd4_sin(<4 x double>) ; AMDLIBM: declare <8 x double> @amd_vrd8_sin(<8 x double>) +; AMDLIBM: declare void @amd_vrd4_sincos(<4 x double>, ptr, ptr) +; AMDLIBM: declare void @amd_vrd8_sincos(<8 x double>, ptr, ptr) +; AMDLIBM: declare void @amd_vrs4_sincosf(<4 x float>, ptr, ptr) +; AMDLIBM: declare void @amd_vrs8_sincosf(<8 x float>, ptr, ptr) +; AMDLIBM: declare void @amd_vrs16_sincosf(<16 x float>, ptr, ptr) ; AMDLIBM: declare <4 x float> @amd_vrs4_log10f(<4 x float>) ; AMDLIBM: declare <8 x float> @amd_vrs8_log10f(<8 x float>) ; AMDLIBM: declare <16 x float> @amd_vrs16_log10f(<16 x float>) @@ -228,6 +240,17 @@ attributes #0 = { nounwind readnone } ; AMDLIBM-SAME: "_ZGV_LLVM_N2v_sin(amd_vrd2_sin), ; AMDLIBM-SAME: _ZGV_LLVM_N4v_sin(amd_vrd4_sin), ; AMDLIBM-SAME: _ZGV_LLVM_N8v_sin(amd_vrd8_sin)" } +; AMDLIBM: attributes #[[SINCOS]] = { "vector-function-abi-variant"= +; AMDLIBM-SAME: "_ZGV_LLVM_N4vl8l8_sincos(amd_vrd4_sincos), +; AMDLIBM-SAME: _ZGV_LLVM_N8vl8l8_sincos(amd_vrd8_sincos)" } +; AMDLIBM: attributes #[[SINCOSF]] = { "vector-function-abi-variant"= +; AMDLIBM-SAME: "_ZGV_LLVM_N4vl4l4_sincosf(amd_vrs4_sincosf), +; AMDLIBM-SAME: _ZGV_LLVM_N8vl4l4_sincosf(amd_vrs8_sincosf), +; AMDLIBM-SAME: _ZGV_LLVM_N16vl4l4_sincosf(amd_vrs16_sincosf)" } +; AMDLIBM: attributes #[[LOG10]] = { "vector-function-abi-variant"= +; AMDLIBM-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(amd_vrs4_log10f), +; AMDLIBM-SAME: _ZGV_LLVM_N8v_llvm.log10.f32(amd_vrs8_log10f), +; AMDLIBM-SAME: _ZGV_LLVM_N16v_llvm.log10.f32(amd_vrs16_log10f)" } ; MASSV: attributes #[[SIN]] = { "vector-function-abi-variant"= ; MASSV-SAME: "_ZGV_LLVM_N2v_sin(__sind2)" } From a393c92f5df141d464bb17cc82f2344866cea1de Mon Sep 17 00:00:00 2001 From: Edd Dawson Date: Tue, 29 Oct 2024 10:16:58 +0000 Subject: [PATCH 264/425] [PS5][Driver] Update default linking options when `-r` omitted. (#113595) Until now, these options have been hardcoded as downstream patches in lld. Add them to the driver so that the private patches can be removed. PS5 only. On PS4, the proprietary linker will continue to perform the equivalent behaviours itself. SIE tracker: TOOLCHAIN-16704 --- clang/lib/Driver/ToolChains/PS4CPU.cpp | 14 +++++++++ clang/test/Driver/ps5-linker.c | 41 ++++++++++++++++---------- 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index a50333223ff5c41..719bba41436a57b 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -250,6 +250,20 @@ void tools::PS5cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-pie"); if (!Relocatable) { + CmdArgs.push_back("--eh-frame-hdr"); + CmdArgs.push_back("--hash-style=sysv"); + + // Add a build-id by default to allow the PlayStation symbol server to + // index the symbols. `uuid` is the cheapest fool-proof method. + // (The non-determinism and alternative methods are noted in the downstream + // PlayStation docs). + CmdArgs.push_back("--build-id=uuid"); + + // All references are expected to be resolved at static link time for both + // executables and dynamic libraries. This has been the default linking + // behaviour for numerous PlayStation generations. + CmdArgs.push_back("--unresolved-symbols=report-all"); + // Lazy binding of PLTs is not supported on PlayStation. They are placed in // the RelRo segment. CmdArgs.push_back("-z"); diff --git a/clang/test/Driver/ps5-linker.c b/clang/test/Driver/ps5-linker.c index d18309a650726d8..2080f4dc91a7fb9 100644 --- a/clang/test/Driver/ps5-linker.c +++ b/clang/test/Driver/ps5-linker.c @@ -14,21 +14,32 @@ // CHECK-NO-PIE-NOT: "-pie" // CHECK-SHARED: "--shared" -// Test the driver passes PlayStation-specific -z options to the linker. - -// RUN: %clang --target=x86_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-Z %s - -// CHECK-Z: {{ld(\.exe)?}}" -// CHECK-Z-SAME: "-z" "now" -// CHECK-Z-SAME: "-z" "start-stop-visibility=hidden" -// CHECK-Z-SAME: "-z" "dead-reloc-in-nonalloc=.debug_*=0xffffffffffffffff" -// CHECK-Z-SAME: "-z" "dead-reloc-in-nonalloc=.debug_ranges=0xfffffffffffffffe" -// CHECK-Z-SAME: "-z" "dead-reloc-in-nonalloc=.debug_loc=0xfffffffffffffffe" - -// RUN: %clang --target=x86_64-sie-ps5 -r %s -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-Z %s - -// CHECK-NO-Z: {{ld(\.exe)?}}" -// CHECK-NO-Z-NOT: "-z" +// Test the driver passes PlayStation-specific options to the linker that are +// appropriate for the type of output. Many options don't apply for relocatable +// output (-r). + +// RUN: %clang --target=x86_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-EXE %s +// RUN: %clang --target=x86_64-sie-ps5 %s -shared -### 2>&1 | FileCheck --check-prefixes=CHECK-EXE %s +// RUN: %clang --target=x86_64-sie-ps5 %s -static -### 2>&1 | FileCheck --check-prefixes=CHECK-EXE %s +// RUN: %clang --target=x86_64-sie-ps5 %s -r -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-EXE %s + +// CHECK-EXE: {{ld(\.exe)?}}" +// CHECK-EXE-SAME: "--eh-frame-hdr" +// CHECK-EXE-SAME: "--hash-style=sysv" +// CHECK-EXE-SAME: "--build-id=uuid" +// CHECK-EXE-SAME: "--unresolved-symbols=report-all" +// CHECK-EXE-SAME: "-z" "now" +// CHECK-EXE-SAME: "-z" "start-stop-visibility=hidden" +// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_*=0xffffffffffffffff" +// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_ranges=0xfffffffffffffffe" +// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_loc=0xfffffffffffffffe" + +// CHECK-NO-EXE: {{ld(\.exe)?}}" +// CHECK-NO-EXE-NOT: "--eh-frame-hdr" +// CHECK-NO-EXE-NOT: "--hash-style +// CHECK-NO-EXE-NOT: "--build-id +// CHECK-NO-EXE-NOT: "--unresolved-symbols +// CHECK-NO-EXE-NOT: "-z" // Test that -static is forwarded to the linker From a8398bd81770a2801ec083fd2cd8a19140fe92a9 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 29 Oct 2024 10:33:17 +0000 Subject: [PATCH 265/425] [llvm][docs] Update list of llvm-lit options Fixes #62899 In this commit I have updated the list of options to include any missing options and re-rordered some of them to match the order in lit's --help. Where there was a larger description in this document I've used that instead of the --help description. This *does not* include --use-unique-output-file-name as this was only added recently and we are still debating whether it will be kept. --- llvm/docs/CommandGuide/lit.rst | 162 +++++++++++++++++++++++---------- 1 file changed, 113 insertions(+), 49 deletions(-) diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst index bd1cfbbade511f0..af8a1a08be535dd 100644 --- a/llvm/docs/CommandGuide/lit.rst +++ b/llvm/docs/CommandGuide/lit.rst @@ -57,7 +57,11 @@ GENERAL OPTIONS .. option:: -h, --help - Show the :program:`lit` help message. + Show the :program:`lit` help message and exit. + +.. option:: --version + + Show :program:`lit`'s version number and exit. .. option:: -j N, --workers=N @@ -108,23 +112,51 @@ OUTPUT OPTIONS Enable -v, but for all tests not just failed tests. +.. option:: -o PATH, --output PATH + + Write test results to the provided path. + .. option:: --no-progress-bar Do not use curses based progress bar. +.. option:: --show-excluded + + Show excluded tests. + +.. option:: --show-skipped + + Show skipped tests. + .. option:: --show-unsupported - Show the names of unsupported tests. + Show unsupported tests. + +.. option:: --show-pass + + Show passed tests. + +.. option:: --show-flakypass + + Show passed with retry tests. .. option:: --show-xfail - Show the names of tests that were expected to fail. + Show expectedly failed tests. .. _execution-options: EXECUTION OPTIONS ----------------- +.. option:: --gtest-sharding + + Enable sharding for GoogleTest format. + +.. option:: --no-gtest-sharding + + Disable sharding for GoogleTest format. + .. option:: --path=PATH Specify an additional ``PATH`` to use when searching for executables in tests. @@ -139,11 +171,6 @@ EXECUTION OPTIONS "``valgrind``" feature that can be used to conditionally disable (or expect failure in) certain tests. -.. option:: --vg-arg=ARG - - When :option:`--vg` is used, specify an additional argument to pass to - :program:`valgrind` itself. - .. option:: --vg-leak When :option:`--vg` is used, enable memory leak checks. When this option is @@ -151,9 +178,59 @@ EXECUTION OPTIONS feature that can be used to conditionally disable (or expect failure in) certain tests. +.. option:: --vg-arg=ARG + + When :option:`--vg` is used, specify an additional argument to pass to + :program:`valgrind` itself. + +.. option:: --no-execute + + Don't execute any tests (assume that they pass). + +.. option:: --xunit-xml-output XUNIT_XML_OUTPUT + + Write XUnit-compatible XML test reports to the specified file. + +.. option:: --resultdb-output RESULTDB_OUTPUT + + Write LuCI ResultDB compatible JSON to the specified file. + +.. option:: --time-trace-output TIME_TRACE_OUTPUT + + Write Chrome tracing compatible JSON to the specified file + +.. option:: --timeout MAXINDIVIDUALTESTTIME + + Maximum time to spend running a single test (in seconds). 0 means no time + limit. [Default: 0] + +.. option:: --timeout=N + + Spend at most ``N`` seconds (approximately) running each individual test. + ``0`` means no time limit, and ``0`` is the default. Note that this is not an + alias for :option:`--max-time`; the two are different kinds of maximums. + +.. option:: --max-failures MAX_FAILURES + + Stop execution after the given number of failures. + +.. option:: --allow-empty-runs + + Do not fail the run if all tests are filtered out. + +.. option:: --per-test-coverage + + Emit the necessary test coverage data, divided per test case (involves + setting a unique value to LLVM_PROFILE_FILE for each RUN). The coverage + data files will be emitted in the directory specified by ``config.test_exec_root``. + +.. option:: --ignore-fail + + Exit with status zero even if some tests fail. + .. option:: --skip-test-time-recording - Disable tracking the wall time individual tests take to execute. + Do not track elapsed wall time for each test. .. option:: --time-tests @@ -161,10 +238,6 @@ EXECUTION OPTIONS in the summary output. This is useful for determining which tests in a test suite take the most time to execute. -.. option:: --ignore-fail - - Exit with status zero even if some tests fail. - .. _selection-options: SELECTION OPTIONS @@ -178,23 +251,6 @@ The timing data is stored in the `test_exec_root` in a file named `.lit_test_times.txt`. If this file does not exist, then `lit` checks the `test_source_root` for the file to optionally accelerate clean builds. -.. option:: --shuffle - - Run the tests in a random order, not failing/slowest first. Deprecated, - use :option:`--order` instead. - -.. option:: --per-test-coverage - - Emit the necessary test coverage data, divided per test case (involves - setting a unique value to LLVM_PROFILE_FILE for each RUN). The coverage - data files will be emitted in the directory specified by `config.test_exec_root`. - -.. option:: --max-failures N - - Stop execution after the given number ``N`` of failures. - An integer argument should be passed on the command line - prior to execution. - .. option:: --max-tests=N Run at most ``N`` tests and then terminate. @@ -205,16 +261,6 @@ The timing data is stored in the `test_exec_root` in a file named Note that this is not an alias for :option:`--timeout`; the two are different kinds of maximums. -.. option:: --num-shards=M - - Divide the set of selected tests into ``M`` equal-sized subsets or - "shards", and run only one of them. Must be used with the - ``--run-shard=N`` option, which selects the shard to run. The environment - variable ``LIT_NUM_SHARDS`` can also be used in place of this - option. These two options provide a coarse mechanism for partitioning large - testsuites, for parallel execution on separate machines (say in a large - testing farm). - .. option:: --order={lexical,random,smart} Define the order in which tests are run. The supported values are: @@ -228,18 +274,14 @@ The timing data is stored in the `test_exec_root` in a file named tests, all in descending execution time order. This is the default as it optimizes concurrency. -.. option:: --run-shard=N +.. option:: --shuffle - Select which shard to run, assuming the ``--num-shards=M`` option was - provided. The two options must be used together, and the value of ``N`` - must be in the range ``1..M``. The environment variable - ``LIT_RUN_SHARD`` can also be used in place of this option. + Run the tests in a random order, not failing/slowest first. Deprecated, + use :option:`--order` instead. -.. option:: --timeout=N +.. option:: -i, --incremental - Spend at most ``N`` seconds (approximately) running each individual test. - ``0`` means no time limit, and ``0`` is the default. Note that this is not an - alias for :option:`--max-time`; the two are different kinds of maximums. + Run failed tests first (DEPRECATED: use ``--order=smart``). .. option:: --filter=REGEXP @@ -297,6 +339,23 @@ The timing data is stored in the `test_exec_root` in a file named primary purpose is to suppress an ``XPASS`` result without modifying a test case that uses the ``XFAIL`` directive. +.. option:: --num-shards=M + + Divide the set of selected tests into ``M`` equal-sized subsets or + "shards", and run only one of them. Must be used with the + ``--run-shard=N`` option, which selects the shard to run. The environment + variable ``LIT_NUM_SHARDS`` can also be used in place of this + option. These two options provide a coarse mechanism for partitioning large + testsuites, for parallel execution on separate machines (say in a large + testing farm). + +.. option:: --run-shard=N + + Select which shard to run, assuming the ``--num-shards=M`` option was + provided. The two options must be used together, and the value of ``N`` + must be in the range ``1..M``. The environment variable + ``LIT_RUN_SHARD`` can also be used in place of this option. + ADDITIONAL OPTIONS ------------------ @@ -313,6 +372,11 @@ ADDITIONAL OPTIONS List all of the discovered tests and exit. +.. option:: --show-used-features + + Show all features used in the test suite (in ``XFAIL``, ``UNSUPPORTED`` and + ``REQUIRES``) and exit. + EXIT STATUS ----------- From 7395ef5419a6438f0c48685bf00b7f151178743d Mon Sep 17 00:00:00 2001 From: wldfngrs Date: Tue, 29 Oct 2024 11:39:57 +0100 Subject: [PATCH 266/425] [libc][math][c23] Add cospif16 function (#113001) Implementation of `cos` for half precision floating point inputs scaled by pi (i.e., `cospi`), correctly rounded for all rounding modes. --------- Co-authored-by: OverMighty --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 +- libc/newhdrgen/yaml/math.yaml | 7 ++ libc/src/math/CMakeLists.txt | 1 + libc/src/math/cospif16.h | 21 ++++++ libc/src/math/generic/CMakeLists.txt | 40 +++++++++-- libc/src/math/generic/cospif16.cpp | 81 ++++++++++++++++++++++ libc/src/math/generic/sincosf16_utils.h | 77 ++++++++++++++++++++ libc/src/math/generic/sinpif16.cpp | 78 +++------------------ libc/test/src/math/CMakeLists.txt | 11 +++ libc/test/src/math/cospif16_test.cpp | 40 +++++++++++ libc/test/src/math/smoke/CMakeLists.txt | 11 +++ libc/test/src/math/smoke/cospif16_test.cpp | 44 ++++++++++++ libc/utils/MPFRWrapper/MPFRUtils.cpp | 20 ++---- 15 files changed, 347 insertions(+), 88 deletions(-) create mode 100644 libc/src/math/cospif16.h create mode 100644 libc/src/math/generic/cospif16.cpp create mode 100644 libc/src/math/generic/sincosf16_utils.h create mode 100644 libc/test/src/math/cospif16_test.cpp create mode 100644 libc/test/src/math/smoke/cospif16_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 71c6e874429fedf..b3f94a581c8ad90 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -607,6 +607,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.canonicalizef16 libc.src.math.ceilf16 libc.src.math.copysignf16 + libc.src.math.cospif16 # TODO: aarch64 bug # Please see https://github.com/llvm/llvm-project/pull/100632#issuecomment-2258772681 # libc.src.math.expf16 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 9bc63edf06f28c9..a2fb97d04584d5b 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -611,6 +611,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.ceilf16 libc.src.math.copysignf16 libc.src.math.coshf16 + libc.src.math.cospif16 libc.src.math.exp10f16 libc.src.math.exp10m1f16 libc.src.math.exp2f16 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index ce4df92393ce7f4..a50e054622e1a45 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -280,7 +280,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | cosh | |check| | | | |check| | | 7.12.5.4 | F.10.2.4 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| cospi | |check| | | | | | 7.12.4.12 | F.10.1.12 | +| cospi | |check| | | | |check| | | 7.12.4.12 | F.10.1.12 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | dsqrt | N/A | N/A | |check| | N/A | |check|\* | 7.12.14.6 | F.10.11 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/newhdrgen/yaml/math.yaml b/libc/newhdrgen/yaml/math.yaml index fe07803cff06f84..3cc4b599c777bff 100644 --- a/libc/newhdrgen/yaml/math.yaml +++ b/libc/newhdrgen/yaml/math.yaml @@ -206,6 +206,13 @@ functions: return_type: float arguments: - type: float + - name: cospif16 + standards: + - stdc + return_type: _Float16 + arguments: + - type: _Float16 + guard: LIBC_TYPES_HAS_FLOAT16 - name: coshf16 standards: - stdc diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index cb4817348cbba5e..80c1867d2116f62 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -95,6 +95,7 @@ add_math_entrypoint_object(coshf) add_math_entrypoint_object(coshf16) add_math_entrypoint_object(cospif) +add_math_entrypoint_object(cospif16) add_math_entrypoint_object(daddl) add_math_entrypoint_object(daddf128) diff --git a/libc/src/math/cospif16.h b/libc/src/math/cospif16.h new file mode 100644 index 000000000000000..ef9625dfed45f6f --- /dev/null +++ b/libc/src/math/cospif16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for cospif16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_COSPIF16_H +#define LLVM_LIBC_SRC_MATH_COSPIF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float16 cospif16(float16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_SINPIF16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 35e7347b91362e4..ca27759d3212f27 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -351,6 +351,17 @@ add_header_library( libc.src.__support.common ) +add_header_library( + sincosf16_utils + HDRS + sincosf16_utils.h + DEPENDS + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.common +) + add_header_library( sincos_eval HDRS @@ -422,6 +433,25 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + cospif16 + SRCS + cospif16.cpp + HDRS + ../cospif16.h + DEPENDS + .sincosf16_utils + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.__support.FPUtil.cast + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.macros.optimization + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( sin SRCS @@ -535,14 +565,14 @@ add_entrypoint_object( HDRS ../sinpif16.h DEPENDS - libc.src.__support.common + .sincosf16_utils + libc.hdr.errno_macros + libc.hdr.fenv_macros libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.macros.properties.types + libc.src.__support.macros.optimization COMPILE_OPTIONS -O3 ) diff --git a/libc/src/math/generic/cospif16.cpp b/libc/src/math/generic/cospif16.cpp new file mode 100644 index 000000000000000..dd8c7ab6afa3d6a --- /dev/null +++ b/libc/src/math/generic/cospif16.cpp @@ -0,0 +1,81 @@ +//===-- Half-precision cospif function ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/cospif16.h" +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "sincosf16_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/cast.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(float16, cospif16, (float16 x)) { + using FPBits = typename fputil::FPBits; + FPBits xbits(x); + + uint16_t x_u = xbits.uintval(); + uint16_t x_abs = x_u & 0x7fff; + float xf = x; + + // Range reduction: + // For |x| > 1/32, we perform range reduction as follows: + // Find k and y such that: + // x = (k + y) * 1/32 + // k is an integer + // |y| < 0.5 + // + // This is done by performing: + // k = round(x * 32) + // y = x * 32 - k + // + // Once k and y are computed, we then deduce the answer by the sine of sum + // formula: + // cos(x * pi) = cos((k + y) * pi/32) + // = cos(k * pi/32) * cos(y * pi/32) + + // sin(y * pi/32) * sin(k * pi/32) + + // For signed zeros + if (LIBC_UNLIKELY(x_abs == 0U)) + return fputil::cast(1.0f); + + // Numbers greater or equal to 2^10 are integers, or infinity, or NaN + if (LIBC_UNLIKELY(x_abs >= 0x6400)) { + if (LIBC_UNLIKELY(x_abs <= 0x67FF)) + return fputil::cast((x_abs & 0x1) ? -1.0f : 1.0f); + + // Check for NaN or infintiy values + if (LIBC_UNLIKELY(x_abs >= 0x7c00)) { + // If value is equal to infinity + if (x_abs == 0x7c00) { + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + } + + return x + FPBits::quiet_nan().get_val(); + } + + return fputil::cast(1.0f); + } + + float sin_k, cos_k, sin_y, cosm1_y; + sincospif16_eval(xf, sin_k, cos_k, sin_y, cosm1_y); + + if (LIBC_UNLIKELY(sin_y == 0 && cos_k == 0)) + return fputil::cast(0.0f); + + // Since, cosm1_y = cos_y - 1, therefore: + // cos(x * pi) = cos_k(cosm1_y) + cos_k - sin_k * sin_y + return fputil::cast(fputil::multiply_add( + cos_k, cosm1_y, fputil::multiply_add(-sin_k, sin_y, cos_k))); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/sincosf16_utils.h b/libc/src/math/generic/sincosf16_utils.h new file mode 100644 index 000000000000000..83511755a56c42d --- /dev/null +++ b/libc/src/math/generic/sincosf16_utils.h @@ -0,0 +1,77 @@ +//===-- Collection of utils for sinf16/cosf16 -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H +#define LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H + +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +// Lookup table for sin(k * pi / 32) with k = 0, ..., 63. +// Table is generated with Sollya as follows: +// > display = hexadecimmal; +// > for k from 0 to 63 do { round(sin(k * pi/32), SG, RN); }; +constexpr float SIN_K_PI_OVER_32[64] = { + 0x0.0p0, 0x1.917a6cp-4, 0x1.8f8b84p-3, 0x1.294062p-2, + 0x1.87de2ap-2, 0x1.e2b5d4p-2, 0x1.1c73b4p-1, 0x1.44cf32p-1, + 0x1.6a09e6p-1, 0x1.8bc806p-1, 0x1.a9b662p-1, 0x1.c38b3p-1, + 0x1.d906bcp-1, 0x1.e9f416p-1, 0x1.f6297cp-1, 0x1.fd88dap-1, + 0x1p0, 0x1.fd88dap-1, 0x1.f6297cp-1, 0x1.e9f416p-1, + 0x1.d906bcp-1, 0x1.c38b3p-1, 0x1.a9b662p-1, 0x1.8bc806p-1, + 0x1.6a09e6p-1, 0x1.44cf32p-1, 0x1.1c73b4p-1, 0x1.e2b5d4p-2, + 0x1.87de2ap-2, 0x1.294062p-2, 0x1.8f8b84p-3, 0x1.917a6cp-4, + 0x0.0p0, -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294062p-2, + -0x1.87de2ap-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1, -0x1.44cf32p-1, + -0x1.6a09e6p-1, -0x1.8bc806p-1, -0x1.a9b662p-1, -0x1.c38b3p-1, + -0x1.d906bcp-1, -0x1.e9f416p-1, -0x1.f6297ep-1, -0x1.fd88dap-1, + -0x1p0, -0x1.fd88dap-1, -0x1.f6297cp-1, -0x1.e9f416p-1, + -0x1.d906bcp-1, -0x1.c38b3p-1, -0x1.a9b662p-1, -0x1.8bc806p-1, + -0x1.6a09e6p-1, -0x1.44cf32p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2, + -0x1.87de2ap-2, -0x1.294062p-2, -0x1.8f8b84p-3, -0x1.917a6cp-4}; + +LIBC_INLINE int32_t range_reduction_sincospif16(float x, float &y) { + float kf = fputil::nearest_integer(x * 32); + y = fputil::multiply_add(x, 32.0, -kf); + + return static_cast(kf); +} + +LIBC_INLINE void sincospif16_eval(float xf, float &sin_k, float &cos_k, + float &sin_y, float &cosm1_y) { + float y; + int32_t k = range_reduction_sincospif16(xf, y); + + sin_k = SIN_K_PI_OVER_32[k & 63]; + cos_k = SIN_K_PI_OVER_32[(k + 16) & 63]; + + // Recall, after range reduction, -0.5 <= y <= 0.5. For very small values of + // y, calculating sin(y * p/32) can be inaccurate. Generating a polynomial for + // sin(y * p/32)/y instead significantly reduces the relative errors. + float ysq = y * y; + + // Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya + // with: + // > Q = fpminimax(sin(y * pi/32)/y, [|0, 2, 4, 6|], [|SG...|], [0, 0.5]); + sin_y = y * fputil::polyeval(ysq, 0x1.921fb6p-4f, -0x1.4aeabcp-13f, + 0x1.a03354p-21f, -0x1.ad02d2p-20f); + + // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya + // with: + // > P = fpminimax(cos(y * pi/32), [|0, 2, 4, 6|],[|1, SG...|], [0, 0.5]); + cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3ccp-8f, 0x1.03a61ap-18f, + 0x1.a6f7a2p-29f); +} + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H diff --git a/libc/src/math/generic/sinpif16.cpp b/libc/src/math/generic/sinpif16.cpp index 17cca583e0c0ec7..51ea595653b4da7 100644 --- a/libc/src/math/generic/sinpif16.cpp +++ b/libc/src/math/generic/sinpif16.cpp @@ -7,52 +7,23 @@ //===----------------------------------------------------------------------===// #include "src/math/sinpif16.h" +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "sincosf16_utils.h" #include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" #include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { -// Lookup table for sin(k * pi / 32) with k = 0, ..., 63. -// Table is generated with Sollya as follows: -// > display = hexadecimmal; -// > for k from 0 to 63 do { round(sin(k * pi/32), SG, RN); }; -static constexpr float SIN_K_PI_OVER_32[64] = { - 0x0.0p0, 0x1.917a6cp-4, 0x1.8f8b84p-3, 0x1.294062p-2, - 0x1.87de2ap-2, 0x1.e2b5d4p-2, 0x1.1c73b4p-1, 0x1.44cf32p-1, - 0x1.6a09e6p-1, 0x1.8bc806p-1, 0x1.a9b662p-1, 0x1.c38b3p-1, - 0x1.d906bcp-1, 0x1.e9f416p-1, 0x1.f6297cp-1, 0x1.fd88dap-1, - 0x1p0, 0x1.fd88dap-1, 0x1.f6297cp-1, 0x1.e9f416p-1, - 0x1.d906bcp-1, 0x1.c38b3p-1, 0x1.a9b662p-1, 0x1.8bc806p-1, - 0x1.6a09e6p-1, 0x1.44cf32p-1, 0x1.1c73b4p-1, 0x1.e2b5d4p-2, - 0x1.87de2ap-2, 0x1.294062p-2, 0x1.8f8b84p-3, 0x1.917a6cp-4, - 0x0.0p0, -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294062p-2, - -0x1.87de2ap-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1, -0x1.44cf32p-1, - -0x1.6a09e6p-1, -0x1.8bc806p-1, -0x1.a9b662p-1, -0x1.c38b3p-1, - -0x1.d906bcp-1, -0x1.e9f416p-1, -0x1.f6297ep-1, -0x1.fd88dap-1, - -0x1p0, -0x1.fd88dap-1, -0x1.f6297cp-1, -0x1.e9f416p-1, - -0x1.d906bcp-1, -0x1.c38b3p-1, -0x1.a9b662p-1, -0x1.8bc806p-1, - -0x1.6a09e6p-1, -0x1.44cf32p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2, - -0x1.87de2ap-2, -0x1.294062p-2, -0x1.8f8b84p-3, -0x1.917a6cp-4}; - -static LIBC_INLINE int32_t range_reduction(float x, float &y) { - float kf = fputil::nearest_integer(x * 32); - y = fputil::multiply_add(x, 32.0, -kf); - - return static_cast(kf); -} - LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) { using FPBits = typename fputil::FPBits; FPBits xbits(x); uint16_t x_u = xbits.uintval(); uint16_t x_abs = x_u & 0x7fff; + float xf = x; // Range reduction: // For |x| > 1/32, we perform range reduction as follows: @@ -68,12 +39,8 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) { // Once k and y are computed, we then deduce the answer by the sine of sum // formula: // sin(x * pi) = sin((k + y) * pi/32) - // = sin(k * pi/32) * cos(y * pi/32) + sin (y * pi/32) * cos (k * - // pi/32) - // The values of sin(k * pi/32) and cos (k * pi/32) for k = 0...63 are - // precomputed and stored using a vector of 64 single precision floats. sin(y - // * pi/32) and cos(y * pi/32) are computed using degree-9 chebyshev - // polynomials generated by Sollya. + // = sin(k * pi/32) * cos(y * pi/32) + + // sin(y * pi/32) * cos(k * pi/32) // For signed zeros if (LIBC_UNLIKELY(x_abs == 0U)) @@ -94,36 +61,8 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) { return FPBits::zero(xbits.sign()).get_val(); } - float f32 = x; - float y; - int32_t k = range_reduction(f32, y); - - float sin_k = SIN_K_PI_OVER_32[k & 63]; - float cos_k = SIN_K_PI_OVER_32[(k + 16) & 63]; - - // Recall; - // sin(x * pi/32) = sin((k + y) * pi/32) - // = sin(y * pi/32) * cos(k * pi/32) + cos(y * pi/32) * sin(k * - // pi/32) Recall, after range reduction, -0.5 <= y <= 0.5. For very small - // values of y, calculating sin(y * p/32) can be inaccurate. Generating a - // polynomial for sin(y * p/32)/y instead significantly reduces the relative - // errors. - float ysq = y * y; - - // Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya - // with: > Q = fpminimax(sin(y*pi/32)/y, [|0, 2, 4, 6|], [|SG...|], [0, 0.5]); - float sin_y = y * fputil::polyeval(ysq, 0x1.921fb6p-4f, -0x1.4aeabcp-13f, - 0x1.a03354p-21f, -0x1.ad02d2p-20f); - - // Note that cosm1_y = cos(y*pi/32) - 1 = cos_y - 1 - // Derivation: - // sin(x * pi) = sin((k + y) * pi/32) - // = sin_y * cos_k + cos_y * sin_k - // = cos_k * sin_y + sin_k * (1 + cos_y - 1) - // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya with: - // > P = fpminimax(cos(y*pi/32), [|0, 2, 4, 6|],[|1, SG...|], [0, 0.5]); - float cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3ccp-8f, 0x1.03a61ap-18f, - 0x1.a6f7a2p-29f); + float sin_k, cos_k, sin_y, cosm1_y; + sincospif16_eval(xf, sin_k, cos_k, sin_y, cosm1_y); if (LIBC_UNLIKELY(sin_y == 0 && sin_k == 0)) return FPBits::zero(xbits.sign()).get_val(); @@ -133,4 +72,5 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) { return fputil::cast(fputil::multiply_add( sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k))); } + } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 262c717dd27d558..b46ef4028915ba4 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -45,6 +45,17 @@ add_fp_unittest( ) +add_fp_unittest( + cospif16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + cospif16_test.cpp + DEPENDS + libc.src.math.cospif16 +) + add_fp_unittest( daddl_test NEED_MPFR diff --git a/libc/test/src/math/cospif16_test.cpp b/libc/test/src/math/cospif16_test.cpp new file mode 100644 index 000000000000000..6a32498b0570ac6 --- /dev/null +++ b/libc/test/src/math/cospif16_test.cpp @@ -0,0 +1,40 @@ +//===-- Exhaustive test for cospif16 --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/cospif16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcCospif16Test = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +// Range: [0, Inf] +static constexpr uint16_t POS_START = 0x0000U; +static constexpr uint16_t POS_STOP = 0x7c00U; + +// Range: [-Inf, 0] +static constexpr uint16_t NEG_START = 0x8000U; +static constexpr uint16_t NEG_STOP = 0xfc00U; + +TEST_F(LlvmLibcCospif16Test, PositiveRange) { + for (uint16_t v = POS_START; v <= POS_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cospi, x, + LIBC_NAMESPACE::cospif16(x), 0.5); + } +} + +TEST_F(LlvmLibcCospif16Test, NegativeRange) { + for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cospi, x, + LIBC_NAMESPACE::cospif16(x), 0.5); + } +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index b2d1871541efc99..269e92c59006281 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -25,6 +25,17 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + cospif16_test + SUITE + libc-math-smoke-tests + SRCS + cospif16_test.cpp + DEPENDS + libc.src.errno.errno + libc.src.math.cospif16 +) + add_fp_unittest( sinf_test SUITE diff --git a/libc/test/src/math/smoke/cospif16_test.cpp b/libc/test/src/math/smoke/cospif16_test.cpp new file mode 100644 index 000000000000000..f6d7483393191fd --- /dev/null +++ b/libc/test/src/math/smoke/cospif16_test.cpp @@ -0,0 +1,44 @@ +//===-- Unittests for cospif16 --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/errno/libc_errno.h" +#include "src/math/cospif16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcCospif16Test = LIBC_NAMESPACE::testing::FPTest; + +TEST_F(LlvmLibcCospif16Test, SpecialNumbers) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::cospif16(aNaN)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif16(zero)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif16(neg_zero)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::cospif16(inf)); + EXPECT_MATH_ERRNO(EDOM); + + EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::cospif16(neg_inf)); + EXPECT_MATH_ERRNO(EDOM); +} + +TEST_F(LlvmLibcCospif16Test, Integers) { + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif16(-0x420)); + EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif16(-0x1.4p+14)); + EXPECT_FP_EQ(-1.0f, LIBC_NAMESPACE::cospif16(0x421)); + EXPECT_FP_EQ(-1.0f, LIBC_NAMESPACE::cospif16(0x333)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::cospif16(-0x1.28p4)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::cospif16(-0x1.ffcp9)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::cospif16(0x1.01p7)); + EXPECT_FP_EQ(zero, LIBC_NAMESPACE::cospif16(0x1.f6cp9)); +} diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index bd4fbe294a622d3..60e4abadb5e3c8a 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -255,19 +255,13 @@ class MPFRNumber { mpfr_cospi(result.value, value, mpfr_rounding); return result; #else - MPFRNumber value_frac(*this); - mpfr_frac(value_frac.value, value, MPFR_RNDN); - - if (mpfr_cmp_si(value_frac.value, 0.0) == 0) { - mpz_t integer_part; - mpz_init(integer_part); - mpfr_get_z(integer_part, value, MPFR_RNDN); - - if (mpz_tstbit(integer_part, 0)) { - mpfr_set_si(result.value, -1.0, MPFR_RNDN); // odd - } else { - mpfr_set_si(result.value, 1.0, MPFR_RNDN); // even - } + if (mpfr_integer_p(value)) { + mpz_t integer; + mpz_init(integer); + mpfr_get_z(integer, value, mpfr_rounding); + + int d = mpz_tstbit(integer, 0); + mpfr_set_si(result.value, d ? -1 : 1, mpfr_rounding); return result; } From c3260c65e86ac363aa3a39f084db66a8a1d1af7d Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 29 Oct 2024 10:52:20 +0000 Subject: [PATCH 267/425] [IR] Add `llvm.sincos` intrinsic (#109825) This adds the `llvm.sincos` intrinsic, legalization, and lowering. The `llvm.sincos` intrinsic takes a floating-point value and returns both the sine and cosine (as a struct). ``` declare { float, float } @llvm.sincos.f32(float %Val) declare { double, double } @llvm.sincos.f64(double %Val) declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %Val) declare { fp128, fp128 } @llvm.sincos.f128(fp128 %Val) declare { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %Val) declare { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val) ``` The lowering is built on top of the existing FSINCOS ISD node, with additional type legalization to allow for f16, f128, and vector values. --- llvm/docs/GlobalISel/GenericOpcode.rst | 4 +- llvm/docs/LangRef.rst | 48 ++ llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 + .../CodeGen/GlobalISel/MachineIRBuilder.h | 7 + llvm/include/llvm/IR/Intrinsics.td | 2 + llvm/include/llvm/Support/TargetOpcodes.def | 3 + llvm/include/llvm/Target/GenericOpcodes.td | 7 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 7 + llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 21 + .../SelectionDAG/LegalizeFloatTypes.cpp | 83 +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 11 +- .../SelectionDAG/LegalizeVectorOps.cpp | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 69 ++- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 16 +- llvm/lib/CodeGen/TargetLoweringBase.cpp | 5 +- .../lib/Target/ARM/ARMTargetTransformInfo.cpp | 1 + .../AArch64/GlobalISel/irtranslator-sincos.ll | 120 ++++ .../GlobalISel/legalizer-info-validation.mir | 3 + llvm/test/CodeGen/AArch64/llvm.frexp.ll | 101 ++++ llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll | 16 + llvm/test/CodeGen/AArch64/llvm.sincos.ll | 553 ++++++++++++++++++ llvm/test/CodeGen/ARM/llvm.sincos.ll | 223 +++++++ 23 files changed, 1300 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll create mode 100644 llvm/test/CodeGen/AArch64/llvm.frexp.ll create mode 100644 llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll create mode 100644 llvm/test/CodeGen/AArch64/llvm.sincos.ll create mode 100644 llvm/test/CodeGen/ARM/llvm.sincos.ll diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 1c4e00b956bc4f8..8920530dc3f1a13 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -633,8 +633,8 @@ G_FCEIL, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT These correspond to the standard C functions of the same name. -G_FCOS, G_FSIN, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +G_FCOS, G_FSIN, G_FSINCOS, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These correspond to the standard C trigonometry functions of the same name. diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index f9ec33da1b651b3..177924dca4d1782 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -15512,6 +15512,8 @@ Semantics: This function returns the first value raised to the second power with an unspecified sequence of rounding operations. +.. _t_llvm_sin: + '``llvm.sin.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -15549,6 +15551,8 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. +.. _t_llvm_cos: + '``llvm.cos.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -15882,6 +15886,50 @@ trapping or setting ``errno``. When specified with the fast-math-flag 'afn', the result may be approximated using a less accurate calculation. + +'``llvm.sincos.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.sincos`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare { float, float } @llvm.sincos.f32(float %Val) + declare { double, double } @llvm.sincos.f64(double %Val) + declare { x86_fp80, x86_fp80 } @llvm.sincos.f80(x86_fp80 %Val) + declare { fp128, fp128 } @llvm.sincos.f128(fp128 %Val) + declare { ppc_fp128, ppc_fp128 } @llvm.sincos.ppcf128(ppc_fp128 %Val) + declare { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float> %Val) + +Overview: +""""""""" + +The '``llvm.sincos.*``' intrinsics returns the sine and cosine of the operand. + +Arguments: +"""""""""" + +The argument is a :ref:`floating-point ` value or +:ref:`vector ` of floating-point values. Returns two values matching +the argument type in a struct. + +Semantics: +"""""""""" + +This intrinsic is equivalent to a calling both :ref:`llvm.sin ` +and :ref:`llvm.cos ` on the argument. + +The first result is the sine of the argument and the second result is the cosine +of the argument. + +When specified with the fast-math-flag 'afn', the result may be approximated +using a less accurate calculation. + '``llvm.pow.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index db3b5cddd7c1c3c..b0316e67654dbc5 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1986,6 +1986,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::cos: ISD = ISD::FCOS; break; + case Intrinsic::sincos: + ISD = ISD::FSINCOS; + break; case Intrinsic::tan: ISD = ISD::FTAN; break; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 9b993482c8cc072..ab3025e4923cd0c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -2009,6 +2009,13 @@ class MachineIRBuilder { return buildInstr(TargetOpcode::G_FFREXP, {Fract, Exp}, {Src}, Flags); } + /// Build and insert \p Sin, \p Cos = G_FSINCOS \p Src + MachineInstrBuilder + buildFSincos(const DstOp &Sin, const DstOp &Cos, const SrcOp &Src, + std::optional Flags = std::nullopt) { + return buildInstr(TargetOpcode::G_FSINCOS, {Sin, Cos}, {Src}, Flags); + } + /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1 MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1) { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 94e53f372127da1..e91758ed34eb389 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1050,6 +1050,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in { def int_nearbyint : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_round : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; def int_roundeven : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>; + def int_sincos : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_anyfloat_ty]>; // Truncate a floating point number with a specific rounding mode def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ], diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def index 3556a253d875fe6..0c4c6ccd5c568e3 100644 --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -809,6 +809,9 @@ HANDLE_TARGET_OPCODE(G_FCOS) /// Floating point sine. HANDLE_TARGET_OPCODE(G_FSIN) +/// Floating point combined sine and cosine. +HANDLE_TARGET_OPCODE(G_FSINCOS) + /// Floating point tangent. HANDLE_TARGET_OPCODE(G_FTAN) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td index 8b8bc9a0e9cf565..62bb9789afe5d26 100644 --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1020,6 +1020,13 @@ def G_FSIN : GenericInstruction { let hasSideEffects = false; } +// Floating point combined sine and cosine. +def G_FSINCOS : GenericInstruction { + let OutOperandList = (outs type0:$dst1, type0:$dst2); + let InOperandList = (ins type0:$src1); + let hasSideEffects = false; +} + // Floating point tangent of a value. def G_FTAN : GenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 563a82644134528..5381dce58f9e65d 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2343,6 +2343,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineInstr::copyFlagsFromInstruction(CI)); return true; } + case Intrinsic::sincos: { + ArrayRef VRegs = getOrCreateVRegs(CI); + MIRBuilder.buildFSincos(VRegs[0], VRegs[1], + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + } case Intrinsic::fptosi_sat: MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI), getOrCreateVReg(*CI.getArgOperand(0))); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e0a03383358b76a..47a9ae12248ccba 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3714,6 +3714,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; } + case ISD::FSINCOS: { + if (isSinCosLibcallAvailable(Node, TLI)) + break; + EVT VT = Node->getValueType(0); + SDValue Op = Node->getOperand(0); + SDNodeFlags Flags = Node->getFlags(); + Tmp1 = DAG.getNode(ISD::FSIN, dl, VT, Op, Flags); + Tmp2 = DAG.getNode(ISD::FCOS, dl, VT, Op, Flags); + Results.append({Tmp1, Tmp2}); + break; + } case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); @@ -5586,6 +5597,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(Tmp2.getValue(1)); break; } + case ISD::FSINCOS: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FSINCOS, dl, DAG.getVTList(NVT, NVT), Tmp1, + Node->getFlags()); + Tmp3 = DAG.getIntPtrConstant(0, dl, /*isTarget=*/true); + for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++) + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum), Tmp3)); + break; + } case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 73c258f0f6f18c2..fa2731ff7dbda75 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -129,6 +129,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLDEXP: case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break; case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break; + case ISD::FSINCOS: R = SoftenFloatRes_FSINCOS(N); break; case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::STRICT_FRINT: @@ -774,6 +775,45 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) { return ReturnVal; } +SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) { + assert(!N->isStrictFPOpcode() && "strictfp not implemented for fsincos"); + EVT VT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getFSINCOS(VT); + + if (!TLI.getLibcallName(LC)) + return SDValue(); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue StackSlotSin = DAG.CreateStackTemporary(NVT); + SDValue StackSlotCos = DAG.CreateStackTemporary(NVT); + + SDLoc DL(N); + + TargetLowering::MakeLibCallOptions CallOptions; + std::array Ops{GetSoftenedFloat(N->getOperand(0)), StackSlotSin, + StackSlotCos}; + std::array OpsVT{VT, StackSlotSin.getValueType(), + StackSlotCos.getValueType()}; + + // TODO: setTypeListBeforeSoften can't properly express multiple return types, + // but since both returns have the same type for sincos it should be okay. + CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true); + + auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL, + /*Chain=*/SDValue()); + + auto CreateStackLoad = [&, Chain = Chain](SDValue StackSlot) { + int FrameIdx = cast(StackSlot)->getIndex(); + auto PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); + return DAG.getLoad(NVT, DL, Chain, StackSlot, PtrInfo); + }; + SetSoftenedFloat(SDValue(N, 0), CreateStackLoad(StackSlotSin)); + SetSoftenedFloat(SDValue(N, 1), CreateStackLoad(StackSlotCos)); + + return SDValue(); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::REM_F32, @@ -2704,6 +2744,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break; case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break; + case ISD::FSINCOS: + R = PromoteFloatRes_FSINCOS(N); + break; + case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; case ISD::STRICT_FP_ROUND: R = PromoteFloatRes_STRICT_FP_ROUND(N); @@ -2899,6 +2943,20 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FFREXP(SDNode *N) { return Res; } +SDValue DAGTypeLegalizer::PromoteFloatRes_FSINCOS(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op = GetPromotedFloat(N->getOperand(0)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, NVT}, Op); + + for (unsigned ResNum = 0, NumValues = N->getNumValues(); ResNum < NumValues; + ++ResNum) { + SetPromotedFloat(SDValue(N, ResNum), Res.getValue(ResNum)); + } + + return SDValue(); +} + // Explicit operation to reduce precision. Reduce the value to half precision // and promote it back to the legal type. SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { @@ -3148,6 +3206,10 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break; + case ISD::FSINCOS: + R = SoftPromoteHalfRes_FSINCOS(N); + break; + case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break; case ISD::ATOMIC_LOAD: R = SoftPromoteHalfRes_ATOMIC_LOAD(N); @@ -3304,6 +3366,27 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FFREXP(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FSINCOS(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Promote to the larger FP type. + Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op); + SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, NVT), Op); + + // Convert back to FP16 as an integer. + ISD::NodeType Truncate = GetPromotionOpcode(NVT, OVT); + for (unsigned ResNum = 0, NumValues = N->getNumValues(); ResNum < NumValues; + ++ResNum) { + SDValue Trunc = DAG.getNode(Truncate, dl, MVT::i16, Res.getValue(ResNum)); + SetSoftPromotedHalf(SDValue(N, ResNum), Trunc); + } + + return SDValue(); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { EVT RVT = N->getValueType(0); bool IsStrict = N->isStrictFPOpcode(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 868da25ca8cb474..8d3458aaab9f865 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -597,6 +597,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FPOW(SDNode *N); SDValue SoftenFloatRes_ExpOp(SDNode *N); SDValue SoftenFloatRes_FFREXP(SDNode *N); + SDValue SoftenFloatRes_FSINCOS(SDNode *N); SDValue SoftenFloatRes_FREEZE(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); @@ -744,6 +745,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue PromoteFloatRes_FMAD(SDNode *N); SDValue PromoteFloatRes_ExpOp(SDNode *N); SDValue PromoteFloatRes_FFREXP(SDNode *N); + SDValue PromoteFloatRes_FSINCOS(SDNode *N); SDValue PromoteFloatRes_FP_ROUND(SDNode *N); SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N); SDValue PromoteFloatRes_LOAD(SDNode *N); @@ -792,6 +794,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftPromoteHalfRes_FMAD(SDNode *N); SDValue SoftPromoteHalfRes_ExpOp(SDNode *N); SDValue SoftPromoteHalfRes_FFREXP(SDNode *N); + SDValue SoftPromoteHalfRes_FSINCOS(SDNode *N); SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N); SDValue SoftPromoteHalfRes_LOAD(SDNode *N); SDValue SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N); @@ -863,7 +866,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N); SDValue ScalarizeVecRes_FIX(SDNode *N); - SDValue ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo); + SDValue ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -917,7 +920,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo, SDValue &Lo, + SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -1068,6 +1072,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_ExpOp(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); + SDValue WidenVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo); + void ReplaceOtherWidenResults(SDNode *N, SDNode *WidenNode, + unsigned WidenResNo); // Widen Vector Operand. bool WidenVectorOperand(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index a8042fc3e7a69a2..c80da28b3dc34d5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -452,6 +452,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UMULO: case ISD::FCANONICALIZE: case ISD::FFREXP: + case ISD::FSINCOS: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 50e2a923699c8ad..5409ae7d9671cb0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -130,7 +130,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_ADDRSPACECAST(N); break; case ISD::FFREXP: - R = ScalarizeVecRes_FFREXP(N, ResNo); + case ISD::FSINCOS: + R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo); break; case ISD::ADD: case ISD::AND: @@ -276,7 +277,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { Op2, N->getFlags()); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo) { +SDValue +DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N, + unsigned ResNo) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexpected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); @@ -1253,7 +1256,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_ADDRSPACECAST(N, Lo, Hi); break; case ISD::FFREXP: - SplitVecRes_FFREXP(N, ResNo, Lo, Hi); + case ISD::FSINCOS: + SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi); break; case ISD::ANY_EXTEND: @@ -2615,8 +2619,10 @@ void DAGTypeLegalizer::SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, Hi = DAG.getAddrSpaceCast(dl, HiVT, Hi, SrcAS, DestAS); } -void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, - SDValue &Lo, SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_UnaryOpWithTwoResults(SDNode *N, + unsigned ResNo, + SDValue &Lo, + SDValue &Hi) { SDLoc dl(N); auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1)); @@ -4429,6 +4435,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) { // Result Vector Widening //===----------------------------------------------------------------------===// +void DAGTypeLegalizer::ReplaceOtherWidenResults(SDNode *N, SDNode *WidenNode, + unsigned WidenResNo) { + unsigned NumResults = N->getNumValues(); + for (unsigned ResNo = 0; ResNo < NumResults; ResNo++) { + if (ResNo == WidenResNo) + continue; + EVT ResVT = N->getValueType(ResNo); + if (getTypeAction(ResVT) == TargetLowering::TypeWidenVector) { + SetWidenedVector(SDValue(N, ResNo), SDValue(WidenNode, ResNo)); + } else { + SDLoc DL(N); + SDValue ResVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, + SDValue(WidenNode, ResNo), + DAG.getVectorIdxConstant(0, DL)); + ReplaceValueWith(SDValue(N, ResNo), ResVal); + } + } +} + void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG)); @@ -4448,6 +4473,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) && TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); + if (N->getNumValues() > 1) + ReplaceOtherWidenResults(N, Res.getNode(), ResNo); return true; } return false; @@ -4752,6 +4779,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FSHR: Res = WidenVecRes_Ternary(N); break; + case ISD::FFREXP: + case ISD::FSINCOS: { + if (!unrollExpandedOp()) + Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo); + break; + } } // If Res is null, the sub-method took care of registering the result. @@ -5500,6 +5533,32 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } +SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N, + unsigned ResNo) { + EVT VT0 = N->getValueType(0); + EVT VT1 = N->getValueType(1); + + assert(VT0.isVector() && VT1.isVector() && + VT0.getVectorElementCount() == VT1.getVectorElementCount() && + "expected both results to be vectors of matching element count"); + + LLVMContext &Ctx = *DAG.getContext(); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + + EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(ResNo)); + ElementCount WidenEC = WidenVT.getVectorElementCount(); + + EVT WidenVT0 = EVT::getVectorVT(Ctx, VT0.getVectorElementType(), WidenEC); + EVT WidenVT1 = EVT::getVectorVT(Ctx, VT1.getVectorElementType(), WidenEC); + + SDNode *WidenNode = + DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT0, WidenVT1}, InOp) + .getNode(); + + ReplaceOtherWidenResults(N, WidenNode, ResNo); + return SDValue(WidenNode, ResNo); +} + SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); return GetWidenedVector(WidenVec); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 1a86b3b51234d18..8f255cce1fe15d1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12533,8 +12533,15 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Scalars1.push_back(EltOp.getValue(1)); } - SDValue Vec0 = getBuildVector(VT, dl, Scalars0); - SDValue Vec1 = getBuildVector(VT1, dl, Scalars1); + for (; i < ResNE; ++i) { + Scalars0.push_back(getUNDEF(EltVT)); + Scalars1.push_back(getUNDEF(EltVT1)); + } + + EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE); + EVT VecVT1 = EVT::getVectorVT(*getContext(), EltVT1, ResNE); + SDValue Vec0 = getBuildVector(VecVT, dl, Scalars0); + SDValue Vec1 = getBuildVector(VecVT1, dl, Scalars1); return getMergeValues({Vec0, Vec1}, dl); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8450553743074c3..203e80e36b46d9e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6936,12 +6936,24 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; + case Intrinsic::sincos: case Intrinsic::frexp: { + unsigned Opcode; + switch (Intrinsic) { + default: + llvm_unreachable("unexpected intrinsic"); + case Intrinsic::sincos: + Opcode = ISD::FSINCOS; + break; + case Intrinsic::frexp: + Opcode = ISD::FFREXP; + break; + } SmallVector ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); SDVTList VTs = DAG.getVTList(ValueVTs); - setValue(&I, - DAG.getNode(ISD::FFREXP, sdl, VTs, getValue(I.getArgOperand(0)))); + setValue( + &I, DAG.getNode(Opcode, sdl, VTs, getValue(I.getArgOperand(0)), Flags)); return; } case Intrinsic::arithmetic_fence: { diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index cab0ed23577437f..5bcde0e1bbec88b 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -773,8 +773,9 @@ void TargetLoweringBase::initActions() { setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); // These library functions default to expand. - setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT, - Expand); + setOperationAction( + {ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP, ISD::FSINCOS}, VT, + Expand); // These operations default to expand for vector types. if (VT.isVector()) diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 9f6e5e5ab1421c8..0e29648a7a284fe 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -2063,6 +2063,7 @@ bool ARMTTIImpl::isLoweredToCall(const Function *F) { case Intrinsic::powi: case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::sincos: case Intrinsic::pow: case Intrinsic::log: case Intrinsic::log10: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll new file mode 100644 index 000000000000000..69cd6ce87b5c6b7 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s + +define { half, half } @test_sincos_f16(half %a) { + ; CHECK-LABEL: name: test_sincos_f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $h0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s16), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $h0 = COPY [[FSINCOS]](s16) + ; CHECK-NEXT: $h1 = COPY [[FSINCOS1]](s16) + ; CHECK-NEXT: RET_ReallyLR implicit $h0, implicit $h1 + %result = call { half, half } @llvm.sincos.f16(half %a) + ret { half, half } %result +} + +define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { + ; CHECK-LABEL: name: test_sincos_v2f16 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(<2 x s16>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[UV]] + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FSINCOS]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV2]](s16), [[UV3]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FSINCOS1]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[DEF]](s16), [[DEF]](s16) + ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $d1 = COPY [[BUILD_VECTOR1]](<4 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define { float, float } @test_sincos_f32(float %a) { + ; CHECK-LABEL: name: test_sincos_f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s32), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $s0 = COPY [[FSINCOS]](s32) + ; CHECK-NEXT: $s1 = COPY [[FSINCOS1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0, implicit $s1 + %result = call { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} + +define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { + ; CHECK-LABEL: name: test_sincos_v2f32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(<2 x s32>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $d0 = COPY [[FSINCOS]](<2 x s32>) + ; CHECK-NEXT: $d1 = COPY [[FSINCOS1]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define { double, double } @test_sincos_f64(double %a) { + ; CHECK-LABEL: name: test_sincos_f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s64), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $d0 = COPY [[FSINCOS]](s64) + ; CHECK-NEXT: $d1 = COPY [[FSINCOS1]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 + %result = call { double, double } @llvm.sincos.f64(double %a) + ret { double, double } %result +} + +define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { + ; CHECK-LABEL: name: test_sincos_v2f64 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(<2 x s64>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $q0 = COPY [[FSINCOS]](<2 x s64>) + ; CHECK-NEXT: $q1 = COPY [[FSINCOS1]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} + +define { fp128, fp128 } @test_sincos_f128(fp128 %a) { + ; CHECK-LABEL: name: test_sincos_f128 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $q0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s128), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]] + ; CHECK-NEXT: $q0 = COPY [[FSINCOS]](s128) + ; CHECK-NEXT: $q1 = COPY [[FSINCOS1]](s128) + ; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 + %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a) + ret { fp128, fp128 } %result +} + +define { float, float } @test_sincos_f32_afn(float %a) { + ; CHECK-LABEL: name: test_sincos_f32_afn + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: [[FSINCOS:%[0-9]+]]:_(s32), [[FSINCOS1:%[0-9]+]]:_ = afn G_FSINCOS [[COPY]] + ; CHECK-NEXT: $s0 = COPY [[FSINCOS]](s32) + ; CHECK-NEXT: $s1 = COPY [[FSINCOS1]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0, implicit $s1 + %result = call afn { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 0af60a503c5f1c6..6be99d0088f1cb9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -700,6 +700,9 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK +# DEBUG-NEXT: G_FSINCOS (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FTAN (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK diff --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll new file mode 100644 index 000000000000000..e4cb8ed6eaf90f1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s + +define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { +; CHECK-LABEL: test_frexp_v2f16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #36 +; CHECK-NEXT: add x19, sp, #36 +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: add x0, sp, #32 +; CHECK-NEXT: fcvt s1, h1 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: fmov s0, s1 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: add x0, sp, #40 +; CHECK-NEXT: mov h1, v1.h[2] +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.h[1], v1.h[0] +; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: mov h1, v1.h[3] +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov v1.h[2], v2.h[0] +; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: fcvt h2, s0 +; CHECK-NEXT: ldr s1, [sp, #32] +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ld1 { v1.s }[1], [x19] +; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: mov v0.h[3], v2.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret + %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) + ret { <2 x half>, <2 x i32> } %result +} + +define { <3 x float>, <3 x i32> } @test_frexp_v3f16_v3i32(<3 x float> %a) { +; CHECK-LABEL: test_frexp_v3f16_v3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: add x0, sp, #56 +; CHECK-NEXT: add x19, sp, #56 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: add x0, sp, #60 +; CHECK-NEXT: add x20, sp, #60 +; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov s0, v0.s[2] +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: ldr s1, [sp, #44] +; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ld1 { v1.s }[1], [x19] +; CHECK-NEXT: mov v2.s[2], v0.s[0] +; CHECK-NEXT: ld1 { v1.s }[2], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret + %result = call { <3 x float>, <3 x i32> } @llvm.frexp.v3float.v3i32(<3 x float> %a) + ret { <3 x float>, <3 x i32> } %result +} diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll b/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll new file mode 100644 index 000000000000000..456b7f98974a9e1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=aarch64-gnu-linux -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s + +; REQUIRES: asserts + +define { float, float } @test_sincos_f32_afn(float %a) { +; CHECK-LABEL: Initial selection DAG: %bb.0 'test_sincos_f32_afn:' +; CHECK-NEXT: SelectionDAG has 9 nodes: +; CHECK-NEXT: t0: ch,glue = EntryToken +; CHECK-NEXT: t2: f32,ch = CopyFromReg t0, Register:f32 %0 +; CHECK-NEXT: t3: f32,f32 = fsincos afn t2 +; CHECK-NEXT: t5: ch,glue = CopyToReg t0, Register:f32 $s0, t3 +; CHECK-NEXT: t7: ch,glue = CopyToReg t5, Register:f32 $s1, t3:1, t5:1 +; CHECK-NEXT: t8: ch = AArch64ISD::RET_GLUE t7, Register:f32 $s0, Register:f32 $s1, t7:1 + %result = call afn { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll new file mode 100644 index 000000000000000..c5efc796e7a3c45 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -mtriple=aarch64-none-linux < %s | FileCheck -check-prefixes=NO-LIBCALL %s + +define { half, half } @test_sincos_f16(half %a) { +; CHECK-LABEL: test_sincos_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s1, s0, [sp, #8] +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: fcvt h1, s1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f16: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: fcvt s8, h0 +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: fcvt h9, s0 +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fmov s1, s0 +; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: fcvt h1, s1 +; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ret + %result = call { half, half } @llvm.sincos.f16(half %a) + ret { half, half } %result +} + +define half @test_sincos_f16_only_use_sin(half %a) { +; CHECK-LABEL: test_sincos_f16_only_use_sin: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #12] +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f16_only_use_sin: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 16 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: fcvt s0, h0 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ret + %result = call { half, half } @llvm.sincos.f16(half %a) + %result.0 = extractvalue { half, half } %result, 0 + ret half %result.0 +} + +define half @test_sincos_f16_only_use_cos(half %a) { +; CHECK-LABEL: test_sincos_f16_only_use_cos: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr s0, [sp, #8] +; CHECK-NEXT: fcvt h0, s0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f16_only_use_cos: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 16 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: fcvt s0, h0 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ret + %result = call { half, half } @llvm.sincos.f16(half %a) + %result.1 = extractvalue { half, half } %result, 1 + ret half %result.1 +} + +define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { +; CHECK-LABEL: test_sincos_v2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov h1, v0.h[1] +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add x0, sp, #36 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: fcvt s0, h1 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #60 +; CHECK-NEXT: add x1, sp, #56 +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s2, s0, [sp, #32] +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ldp s3, s1, [sp, #24] +; CHECK-NEXT: fcvt h4, s0 +; CHECK-NEXT: fcvt h2, s2 +; CHECK-NEXT: fcvt h0, s1 +; CHECK-NEXT: fcvt h1, s3 +; CHECK-NEXT: ldp s5, s3, [sp, #40] +; CHECK-NEXT: fcvt h3, s3 +; CHECK-NEXT: mov v0.h[1], v4.h[0] +; CHECK-NEXT: fcvt h4, s5 +; CHECK-NEXT: mov v1.h[1], v2.h[0] +; CHECK-NEXT: ldp s5, s2, [sp, #56] +; CHECK-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NEXT: fcvt h2, s2 +; CHECK-NEXT: fcvt h3, s5 +; CHECK-NEXT: mov v1.h[2], v4.h[0] +; CHECK-NEXT: mov v0.h[3], v2.h[0] +; CHECK-NEXT: mov v1.h[3], v3.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f16: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #80 +; NO-LIBCALL-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 80 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: .cfi_offset b10, -40 +; NO-LIBCALL-NEXT: .cfi_offset b11, -48 +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: mov h1, v0.h[1] +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fcvt s8, h1 +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: fcvt s9, h1 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: mov h1, v1.h[2] +; NO-LIBCALL-NEXT: fcvt s10, h1 +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s10 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: mov h1, v1.h[3] +; NO-LIBCALL-NEXT: fcvt s11, h1 +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0] +; NO-LIBCALL-NEXT: fmov s0, s11 +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[3], v0.h[0] +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0] +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s10 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fcvt h0, s0 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0] +; NO-LIBCALL-NEXT: fmov s0, s11 +; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fmov s1, s0 +; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: fcvt h2, s1 +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.h[3], v2.h[0] +; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1 +; NO-LIBCALL-NEXT: add sp, sp, #80 +; NO-LIBCALL-NEXT: ret + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define { float, float } @test_sincos_f32(float %a) { +; CHECK-LABEL: test_sincos_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s1, s0, [sp, #8] +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f32: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: fmov s8, s0 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: fmov s9, s0 +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: fmov s1, s0 +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ret + %result = call { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} + +define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) { +; CHECK-LABEL: test_sincos_v3f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: add x0, sp, #20 +; CHECK-NEXT: add x1, sp, #16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: add x19, sp, #28 +; CHECK-NEXT: add x20, sp, #24 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: add x21, sp, #44 +; CHECK-NEXT: add x22, sp, #40 +; CHECK-NEXT: mov s0, v0.s[2] +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s1, s0, [sp, #16] +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ld1 { v0.s }[1], [x19] +; CHECK-NEXT: ld1 { v1.s }[1], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ld1 { v0.s }[2], [x21] +; CHECK-NEXT: ld1 { v1.s }[2], [x22] +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v3f32: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #80 +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 80 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: mov s8, v0.s[1] +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: mov s9, v0.s[2] +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: mov v1.s[2], v0.s[0] +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s9 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fmov s2, s0 +; NO-LIBCALL-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload +; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.s[2], v2.s[0] +; NO-LIBCALL-NEXT: add sp, sp, #80 +; NO-LIBCALL-NEXT: ret + %result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> %a) + ret { <3 x float>, <3 x float> } %result +} + +define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { +; CHECK-LABEL: test_sincos_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: add x0, sp, #44 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #28 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: add x19, sp, #28 +; CHECK-NEXT: add x20, sp, #24 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldp s1, s0, [sp, #40] +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ld1 { v0.s }[1], [x19] +; CHECK-NEXT: ld1 { v1.s }[1], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f32: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #64 +; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 64 +; NO-LIBCALL-NEXT: .cfi_offset w30, -8 +; NO-LIBCALL-NEXT: .cfi_offset b8, -16 +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: mov s8, v0.s[1] +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl sinf +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov s0, s8 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0 +; NO-LIBCALL-NEXT: bl cosf +; NO-LIBCALL-NEXT: fmov s1, s0 +; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: mov v1.s[1], v2.s[0] +; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1 +; NO-LIBCALL-NEXT: add sp, sp, #64 +; NO-LIBCALL-NEXT: ret + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define { double, double } @test_sincos_f64(double %a) { +; CHECK-LABEL: test_sincos_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: add x0, sp, #24 +; CHECK-NEXT: add x1, sp, #8 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr d0, [sp, #24] +; CHECK-NEXT: ldr d1, [sp, #8] +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_f64: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32 +; NO-LIBCALL-NEXT: .cfi_offset w30, -16 +; NO-LIBCALL-NEXT: .cfi_offset b8, -24 +; NO-LIBCALL-NEXT: .cfi_offset b9, -32 +; NO-LIBCALL-NEXT: fmov d8, d0 +; NO-LIBCALL-NEXT: bl sin +; NO-LIBCALL-NEXT: fmov d9, d0 +; NO-LIBCALL-NEXT: fmov d0, d8 +; NO-LIBCALL-NEXT: bl cos +; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: fmov d1, d0 +; NO-LIBCALL-NEXT: fmov d0, d9 +; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload +; NO-LIBCALL-NEXT: ret + %result = call { double, double } @llvm.sincos.f64(double %a) + ret { double, double } %result +} + +define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_sincos_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: add x0, sp, #56 +; CHECK-NEXT: add x1, sp, #40 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: add x0, sp, #32 +; CHECK-NEXT: add x1, sp, #24 +; CHECK-NEXT: add x19, sp, #32 +; CHECK-NEXT: add x20, sp, #24 +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldr d0, [sp, #56] +; CHECK-NEXT: ldr d1, [sp, #40] +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: ld1 { v0.d }[1], [x19] +; CHECK-NEXT: ld1 { v1.d }[1], [x20] +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret +; +; NO-LIBCALL-LABEL: test_sincos_v2f64: +; NO-LIBCALL: // %bb.0: +; NO-LIBCALL-NEXT: sub sp, sp, #64 +; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 64 +; NO-LIBCALL-NEXT: .cfi_offset w30, -8 +; NO-LIBCALL-NEXT: .cfi_offset b8, -16 +; NO-LIBCALL-NEXT: mov d8, v0.d[1] +; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov d0, d8 +; NO-LIBCALL-NEXT: bl sin +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: bl sin +; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0] +; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: fmov d0, d8 +; NO-LIBCALL-NEXT: bl cos +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0 +; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill +; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; NO-LIBCALL-NEXT: bl cos +; NO-LIBCALL-NEXT: fmov d1, d0 +; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload +; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; NO-LIBCALL-NEXT: mov v1.d[1], v2.d[0] +; NO-LIBCALL-NEXT: add sp, sp, #64 +; NO-LIBCALL-NEXT: ret + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} diff --git a/llvm/test/CodeGen/ARM/llvm.sincos.ll b/llvm/test/CodeGen/ARM/llvm.sincos.ll new file mode 100644 index 000000000000000..9628405df6bcb9e --- /dev/null +++ b/llvm/test/CodeGen/ARM/llvm.sincos.ll @@ -0,0 +1,223 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s + +define { half, half } @test_sincos_f16(half %a) { +; CHECK-LABEL: test_sincos_f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: ldr r0, [sp] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r4, pc} + %result = call { half, half } @llvm.sincos.f16(half %a) + ret { half, half } %result +} + +define half @test_sincos_f16_only_use_sin(half %a) { +; CHECK-LABEL: test_sincos_f16_only_use_sin: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} + %result = call { half, half } @llvm.sincos.f16(half %a) + %result.0 = extractvalue { half, half } %result, 0 + ret half %result.0 +} + +define half @test_sincos_f16_only_use_cos(half %a) { +; CHECK-LABEL: test_sincos_f16_only_use_cos: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r7, pc} + %result = call { half, half } @llvm.sincos.f16(half %a) + %result.1 = extractvalue { half, half } %result, 1 + ret half %result.1 +} + +define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) { +; CHECK-LABEL: test_sincos_v2f16: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl __gnu_h2f_ieee +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldr r0, [sp, #12] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: ldr r1, [sp, #4] +; CHECK-NEXT: strh.w r0, [sp, #22] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: strh.w r0, [sp, #20] +; CHECK-NEXT: add r0, sp, #20 +; CHECK-NEXT: vld1.32 {d8[0]}, [r0:32] +; CHECK-NEXT: ldr r0, [sp, #8] +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: ldr r1, [sp] +; CHECK-NEXT: strh.w r0, [sp, #18] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: strh.w r0, [sp, #16] +; CHECK-NEXT: add r0, sp, #16 +; CHECK-NEXT: vmovl.u16 q9, d8 +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov.32 r0, d18[0] +; CHECK-NEXT: vmov.32 r1, d18[1] +; CHECK-NEXT: vmov.32 r2, d16[0] +; CHECK-NEXT: vmov.32 r3, d16[1] +; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r4, pc} + %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a) + ret { <2 x half>, <2 x half> } %result +} + +define { float, float } @test_sincos_f32(float %a) { +; CHECK-LABEL: test_sincos_f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: ldrd r1, r0, [sp], #8 +; CHECK-NEXT: pop {r7, pc} + %result = call { float, float } @llvm.sincos.f32(float %a) + ret { float, float } %result +} + +define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) { +; CHECK-LABEL: test_sincos_v2f32: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vpush {d8} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: add r1, sp, #4 +; CHECK-NEXT: mov r2, sp +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: add r1, sp, #12 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: bl sincosf +; CHECK-NEXT: vldr s1, [sp, #4] +; CHECK-NEXT: vldr s3, [sp] +; CHECK-NEXT: vldr s0, [sp, #12] +; CHECK-NEXT: vldr s2, [sp, #8] +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8} +; CHECK-NEXT: pop {r7, pc} + %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a) + ret { <2 x float>, <2 x float> } %result +} + +define { double, double } @test_sincos_f64(double %a) { +; CHECK-LABEL: test_sincos_f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldrd r0, r1, [sp, #8] +; CHECK-NEXT: ldrd r2, r3, [sp], #16 +; CHECK-NEXT: pop {r7, pc} + %result = call { double, double } @llvm.sincos.f64(double %a) + ret { double, double } %result +} + +define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_sincos_v2f64: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: mov r12, r2 +; CHECK-NEXT: add r2, sp, #24 +; CHECK-NEXT: add r3, sp, #16 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: bl sincos +; CHECK-NEXT: ldrd r0, r1, [sp, #40] +; CHECK-NEXT: add r2, sp, #8 +; CHECK-NEXT: mov r3, sp +; CHECK-NEXT: bl sincos +; CHECK-NEXT: vldr d19, [sp, #8] +; CHECK-NEXT: vldr d18, [sp, #24] +; CHECK-NEXT: vldr d17, [sp] +; CHECK-NEXT: vldr d16, [sp, #16] +; CHECK-NEXT: vst1.64 {d18, d19}, [r4]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r4] +; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: pop {r4, pc} + %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a) + ret { <2 x double>, <2 x double> } %result +} + +define { fp128, fp128 } @test_sincos_f128(fp128 %a) { +; CHECK-LABEL: test_sincos_f128: +; CHECK: @ %bb.0: +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: sub sp, #40 +; CHECK-NEXT: mov r12, r3 +; CHECK-NEXT: ldr r3, [sp, #56] +; CHECK-NEXT: add.w lr, sp, #8 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: add r0, sp, #24 +; CHECK-NEXT: strd r0, lr, [sp] +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: bl sincosl +; CHECK-NEXT: ldrd r2, r3, [sp, #16] +; CHECK-NEXT: ldrd r12, r1, [sp, #8] +; CHECK-NEXT: str r3, [r4, #28] +; CHECK-NEXT: ldrd r3, r5, [sp, #32] +; CHECK-NEXT: ldrd lr, r0, [sp, #24] +; CHECK-NEXT: strd r1, r2, [r4, #20] +; CHECK-NEXT: add.w r1, r4, #8 +; CHECK-NEXT: stm.w r1, {r3, r5, r12} +; CHECK-NEXT: strd lr, r0, [r4] +; CHECK-NEXT: add sp, #40 +; CHECK-NEXT: pop {r4, r5, r7, pc} + %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a) + ret { fp128, fp128 } %result +} From 32aa782ea297b3e0ec090cf8fc0055d00c99d24b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Oct 2024 10:57:36 +0000 Subject: [PATCH 268/425] [PowerPC] copysignl.ll - regenerate to reduce the diff in #111269 --- llvm/test/CodeGen/PowerPC/copysignl.ll | 138 ++++++++++++++++++------- 1 file changed, 103 insertions(+), 35 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/copysignl.ll b/llvm/test/CodeGen/PowerPC/copysignl.ll index 427826daa2c6382..40ed3d803094f45 100644 --- a/llvm/test/CodeGen/PowerPC/copysignl.ll +++ b/llvm/test/CodeGen/PowerPC/copysignl.ll @@ -1,82 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=-vsx < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s -check-prefix=CHECK-VSX target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" define double @foo_d_ll(ppc_fp128 %a, ppc_fp128 %b) #0 { +; CHECK-LABEL: foo_d_ll: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcpsgn 1, 3, 1 +; CHECK-NEXT: blr +; +; CHECK-VSX-LABEL: foo_d_ll: +; CHECK-VSX: # %bb.0: # %entry +; CHECK-VSX-NEXT: xscpsgndp 1, 3, 1 +; CHECK-VSX-NEXT: blr entry: %call = tail call ppc_fp128 @copysignl(ppc_fp128 %a, ppc_fp128 %b) #0 %conv = fptrunc ppc_fp128 %call to double ret double %conv - -; CHECK-LABEL: @foo_d_ll -; CHECK: fcpsgn 1, 3, 1 -; CHECK: blr -; CHECK-VSX-LABEL: @foo_d_ll -; CHECK-VSX: xscpsgndp 1, 3, 1 -; CHECK-VSX: blr } declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0 define double @foo_dl(double %a, ppc_fp128 %b) #0 { +; CHECK-LABEL: foo_dl: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fcpsgn 1, 2, 1 +; CHECK-NEXT: blr +; +; CHECK-VSX-LABEL: foo_dl: +; CHECK-VSX: # %bb.0: # %entry +; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1 +; CHECK-VSX-NEXT: blr entry: %conv = fptrunc ppc_fp128 %b to double %call = tail call double @copysign(double %a, double %conv) #0 ret double %call - -; CHECK-LABEL: @foo_dl -; CHECK: fcpsgn 1, 2, 1 -; CHECK: blr -; CHECK-VSX-LABEL: @foo_dl -; CHECK-VSX: xscpsgndp 1, 2, 1 -; CHECK-VSX: blr } declare double @copysign(double, double) #0 define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 { +; CHECK-LABEL: foo_ll: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -112(1) +; CHECK-NEXT: fmr 3, 2 +; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha +; CHECK-NEXT: std 0, 128(1) +; CHECK-NEXT: lfs 2, .LCPI2_0@toc@l(3) +; CHECK-NEXT: bl copysignl +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 112 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +; +; CHECK-VSX-LABEL: foo_ll: +; CHECK-VSX: # %bb.0: # %entry +; CHECK-VSX-NEXT: mflr 0 +; CHECK-VSX-NEXT: stdu 1, -112(1) +; CHECK-VSX-NEXT: fmr 3, 2 +; CHECK-VSX-NEXT: xxlxor 2, 2, 2 +; CHECK-VSX-NEXT: std 0, 128(1) +; CHECK-VSX-NEXT: bl copysignl +; CHECK-VSX-NEXT: nop +; CHECK-VSX-NEXT: addi 1, 1, 112 +; CHECK-VSX-NEXT: ld 0, 16(1) +; CHECK-VSX-NEXT: mtlr 0 +; CHECK-VSX-NEXT: blr entry: %conv = fpext double %a to ppc_fp128 %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %b) #0 ret ppc_fp128 %call - -; CHECK-LABEL: @foo_ll -; CHECK: bl copysignl -; CHECK: blr -; CHECK-VSX-LABEL: @foo_ll -; CHECK-VSX: bl copysignl -; CHECK-VSX: blr } define ppc_fp128 @foo_ld(double %a, double %b) #0 { +; CHECK-LABEL: foo_ld: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -112(1) +; CHECK-NEXT: fmr 3, 2 +; CHECK-NEXT: addis 3, 2, .LCPI3_0@toc@ha +; CHECK-NEXT: std 0, 128(1) +; CHECK-NEXT: lfs 2, .LCPI3_0@toc@l(3) +; CHECK-NEXT: bl copysignl +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 112 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +; +; CHECK-VSX-LABEL: foo_ld: +; CHECK-VSX: # %bb.0: # %entry +; CHECK-VSX-NEXT: mflr 0 +; CHECK-VSX-NEXT: stdu 1, -112(1) +; CHECK-VSX-NEXT: fmr 3, 2 +; CHECK-VSX-NEXT: xxlxor 2, 2, 2 +; CHECK-VSX-NEXT: std 0, 128(1) +; CHECK-VSX-NEXT: bl copysignl +; CHECK-VSX-NEXT: nop +; CHECK-VSX-NEXT: addi 1, 1, 112 +; CHECK-VSX-NEXT: ld 0, 16(1) +; CHECK-VSX-NEXT: mtlr 0 +; CHECK-VSX-NEXT: blr entry: %conv = fpext double %a to ppc_fp128 %conv1 = fpext double %b to ppc_fp128 %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0 ret ppc_fp128 %call - -; CHECK-LABEL: @foo_ld -; CHECK: bl copysignl -; CHECK: blr -; CHECK-VSX-LABEL: @foo_ld -; CHECK-VSX: bl copysignl -; CHECK-VSX: blr } define ppc_fp128 @foo_lf(double %a, float %b) #0 { +; CHECK-LABEL: foo_lf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -112(1) +; CHECK-NEXT: fmr 3, 2 +; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha +; CHECK-NEXT: std 0, 128(1) +; CHECK-NEXT: lfs 2, .LCPI4_0@toc@l(3) +; CHECK-NEXT: bl copysignl +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 112 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +; +; CHECK-VSX-LABEL: foo_lf: +; CHECK-VSX: # %bb.0: # %entry +; CHECK-VSX-NEXT: mflr 0 +; CHECK-VSX-NEXT: stdu 1, -112(1) +; CHECK-VSX-NEXT: fmr 3, 2 +; CHECK-VSX-NEXT: xxlxor 2, 2, 2 +; CHECK-VSX-NEXT: std 0, 128(1) +; CHECK-VSX-NEXT: bl copysignl +; CHECK-VSX-NEXT: nop +; CHECK-VSX-NEXT: addi 1, 1, 112 +; CHECK-VSX-NEXT: ld 0, 16(1) +; CHECK-VSX-NEXT: mtlr 0 +; CHECK-VSX-NEXT: blr entry: %conv = fpext double %a to ppc_fp128 %conv1 = fpext float %b to ppc_fp128 %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0 ret ppc_fp128 %call - -; CHECK-LABEL: @foo_lf -; CHECK: bl copysignl -; CHECK: blr -; CHECK-VSX-LABEL: @foo_lf -; CHECK-VSX: bl copysignl -; CHECK-VSX: blr } attributes #0 = { nounwind readnone } From f537792f3f4977c8bc887b17ffc25e93833e7d0d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Oct 2024 11:00:35 +0000 Subject: [PATCH 269/425] [X86] Refactor the SSE intrinsics constexpr tests to simplify future expansion (#112578) I'm hoping to make a large proportion of the SSE/AVX intrinsics usable in constant expressions - eventually anything that doesn't touch memory or system settings - making it much easier to utilize SSE/AVX intrinsics in various math libraries etc. My initial implementation placed the tests at the end of the test file, similar to how smaller files already handle their tests. However, what I'm finding is that this approach doesn't scale when trying to track coverage of so many intrinsics - many keep getting missed, and it gets messy; so what I'm proposing is to instead keep each intrinsic's generic IR test and its constexpr tests together to make them easier to track together, wrapping the static_assert inside a macro to disable on C and pre-C++11 tests. I'm open to alternative suggestions before I invest too much time getting this work done :) --- clang/test/CodeGen/X86/builtin_test_helpers.h | 25 ++++ clang/test/CodeGen/X86/sse-builtins.c | 127 +++++----------- clang/test/CodeGen/X86/sse2-builtins.c | 139 ++++-------------- clang/test/CodeGen/X86/sse3-builtins.c | 23 +-- 4 files changed, 95 insertions(+), 219 deletions(-) create mode 100644 clang/test/CodeGen/X86/builtin_test_helpers.h diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h new file mode 100644 index 000000000000000..043b6ecbc69f18f --- /dev/null +++ b/clang/test/CodeGen/X86/builtin_test_helpers.h @@ -0,0 +1,25 @@ +/* Helper methods for builtin intrinsic tests */ + +#include + +#if defined(__cplusplus) && (__cplusplus >= 201103L) + +constexpr bool match_m128(__m128 v, float x, float y, float z, float w) { + return v[0] == x && v[1] == y && v[2] == z && v[3] == w; +} + +constexpr bool match_m128d(__m128d v, double x, double y) { + return v[0] == x && v[1] == y; +} + +constexpr bool match_m128i(__m128i v, unsigned long long x, unsigned long long y) { + return v[0] == x && v[1] == y; +} + +#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__) + +#else + +#define TEST_CONSTEXPR(...) + +#endif \ No newline at end of file diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 391e049a6ae3ef0..f779ab07a266408 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -5,6 +5,7 @@ #include +#include "builtin_test_helpers.h" // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -13,6 +14,7 @@ __m128 test_mm_add_ps(__m128 A, __m128 B) { // CHECK: fadd <4 x float> return _mm_add_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_add_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +4.0f, +4.0f, +5.0f)); __m128 test_mm_add_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_add_ss @@ -22,12 +24,14 @@ __m128 test_mm_add_ss(__m128 A, __m128 B) { // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_add_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_add_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +0.0f, +2.0f, +4.0f)); __m128 test_mm_and_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_and_ps // CHECK: and <4 x i32> return _mm_and_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -0.0f, -0.0f, +0.0f, +7.0f)); __m128 test_mm_andnot_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_andnot_ps @@ -35,6 +39,7 @@ __m128 test_mm_andnot_ps(__m128 A, __m128 B) { // CHECK: and <4 x i32> return _mm_andnot_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_andnot_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), +0.0f, +0.0f, +0.0f, +0.0f)); __m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) { // CHECK-LABEL: test_mm_cmp_ps_eq_oq @@ -322,6 +327,15 @@ __m128 test_mm_cvtsi32_ss(__m128 A, int B) { // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_cvtsi32_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_cvtsi32_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 42), +42.0f, +0.0f, +2.0f, +4.0f)); + +__m128 test_mm_cvt_si2ss(__m128 A, int B) { + // CHECK-LABEL: test_mm_cvt_si2ss + // CHECK: sitofp i32 %{{.*}} to float + // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 + return _mm_cvt_si2ss(A, B); +} +TEST_CONSTEXPR(match_m128(_mm_cvt_si2ss((__m128){+4.0f, +2.0f, +0.0f, +4.0f}, -99), -99.0f, +2.0f, +0.0f, +4.0f)); #ifdef __x86_64__ __m128 test_mm_cvtsi64_ss(__m128 A, long long B) { @@ -330,6 +344,7 @@ __m128 test_mm_cvtsi64_ss(__m128 A, long long B) { // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_cvtsi64_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_cvtsi64_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 555), +555.0f, +0.0f, +2.0f, +4.0f)); #endif float test_mm_cvtss_f32(__m128 A) { @@ -337,6 +352,7 @@ float test_mm_cvtss_f32(__m128 A) { // CHECK: extractelement <4 x float> %{{.*}}, i32 0 return _mm_cvtss_f32(A); } +TEST_CONSTEXPR(_mm_cvtss_f32((__m128){+8.0f, +4.0f, +2.0f, +1.0f}) == +8.0f); int test_mm_cvtss_si32(__m128 A) { // CHECK-LABEL: test_mm_cvtss_si32 @@ -377,6 +393,7 @@ __m128 test_mm_div_ps(__m128 A, __m128 B) { // CHECK: fdiv <4 x float> return _mm_div_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_div_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +0.0f, +1.0f, +4.0f)); __m128 test_mm_div_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_div_ss @@ -386,6 +403,7 @@ __m128 test_mm_div_ss(__m128 A, __m128 B) { // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_div_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_div_ss((__m128){+1.0f, +5.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +5.0f, +2.0f, +4.0f)); unsigned int test_MM_GET_EXCEPTION_MASK(void) { // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK @@ -517,18 +535,21 @@ __m128 test_mm_move_ss(__m128 A, __m128 B) { // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_move_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_move_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +2.0f, +4.0f)); __m128 test_mm_movehl_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_movehl_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_movehl_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_movehl_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +1.0f, +2.0f, +4.0f)); __m128 test_mm_movelh_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_movelh_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_movelh_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_movelh_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +0.0f, +8.0f, +4.0f)); int test_mm_movemask_ps(__m128 A) { // CHECK-LABEL: test_mm_movemask_ps @@ -541,6 +562,7 @@ __m128 test_mm_mul_ps(__m128 A, __m128 B) { // CHECK: fmul <4 x float> return _mm_mul_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f)); __m128 test_mm_mul_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_mul_ss @@ -550,12 +572,14 @@ __m128 test_mm_mul_ss(__m128 A, __m128 B) { // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_mul_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f)); __m128 test_mm_or_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_or_ps // CHECK: or <4 x i32> return _mm_or_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_or_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, -5.0f, -6.0f, +7.0f)); void test_mm_prefetch(char const* p) { // CHECK-LABEL: test_mm_prefetch @@ -628,6 +652,7 @@ __m128 test_mm_set_ps(float A, float B, float C, float D) { // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_set_ps(A, B, C, D); } +TEST_CONSTEXPR(match_m128(_mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f), +3.0f, +2.0f, +1.0f, +.0f)); __m128 test_mm_set_ps1(float A) { // CHECK-LABEL: test_mm_set_ps1 @@ -637,6 +662,7 @@ __m128 test_mm_set_ps1(float A) { // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_set_ps1(A); } +TEST_CONSTEXPR(match_m128(_mm_set_ps1(-2.0f), -2.0f, -2.0f, -2.0f, -2.0f)); void test_MM_SET_ROUNDING_MODE(unsigned int A) { // CHECK-LABEL: test_MM_SET_ROUNDING_MODE @@ -657,6 +683,7 @@ __m128 test_mm_set_ss(float A) { // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3 return _mm_set_ss(A); } +TEST_CONSTEXPR(match_m128(_mm_set_ss(1.0f), +1.0f, +0.0f, +0.0f, +0.0f)); __m128 test_mm_set1_ps(float A) { // CHECK-LABEL: test_mm_set1_ps @@ -666,6 +693,7 @@ __m128 test_mm_set1_ps(float A) { // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_set1_ps(A); } +TEST_CONSTEXPR(match_m128(_mm_set1_ps(2.0f), +2.0f, +2.0f, +2.0f, +2.0f)); void test_mm_setcsr(unsigned int A) { // CHECK-LABEL: test_mm_setcsr @@ -682,12 +710,14 @@ __m128 test_mm_setr_ps(float A, float B, float C, float D) { // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 return _mm_setr_ps(A, B, C, D); } +TEST_CONSTEXPR(match_m128(_mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f), +0.0f, +1.0f, +2.0f, +3.0f)); __m128 test_mm_setzero_ps(void) { // CHECK-LABEL: test_mm_setzero_ps // CHECK: store <4 x float> zeroinitializer return _mm_setzero_ps(); } +TEST_CONSTEXPR(match_m128(_mm_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f)); void test_mm_sfence(void) { // CHECK-LABEL: test_mm_sfence @@ -787,6 +817,7 @@ __m128 test_mm_sub_ps(__m128 A, __m128 B) { // CHECK: fsub <4 x float> return _mm_sub_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_sub_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, -4.0f, +0.0f, +3.0f)); __m128 test_mm_sub_ss(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_sub_ss @@ -796,6 +827,7 @@ __m128 test_mm_sub_ss(__m128 A, __m128 B) { // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 return _mm_sub_ss(A, B); } +TEST_CONSTEXPR(match_m128(_mm_sub_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, +0.0f, +2.0f, +4.0f)); void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) { // CHECK-LABEL: test_MM_TRANSPOSE4_PS @@ -857,107 +889,18 @@ __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) { // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_unpackhi_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_unpackhi_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +2.0f, +4.0f, +1.0f)); __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_unpacklo_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_unpacklo_ps(A, B); } +TEST_CONSTEXPR(match_m128(_mm_unpacklo_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +8.0f, +0.0f, +4.0f)); __m128 test_mm_xor_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_xor_ps // CHECK: xor <4 x i32> return _mm_xor_ps(A, B); } - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) - -void test_constexpr() { - constexpr __m128 k1 {+1.0f,+0.0f,+2.0f,+4.0f}; - constexpr __m128 k2 {+8.0f,+4.0f,+2.0f,+1.0f}; - constexpr __m128 k3 {-4.0f,-5.0f,+6.0f,+7.0f}; - constexpr __m128 k4 {+0.0f,-0.0f,-0.0f,+0.0f}; - - constexpr __m128 v_mm_set_ss = _mm_set_ss(1.0f); - static_assert(v_mm_set_ss[0] == +1.0f && v_mm_set_ss[1] == +0.0f && v_mm_set_ss[2] == +0.0f && v_mm_set_ss[3] == +0.0f); - - constexpr __m128 v_mm_set1_ps = _mm_set1_ps(2.0f); - static_assert(v_mm_set1_ps[0] == +2.0f && v_mm_set1_ps[1] == +2.0f && v_mm_set1_ps[2] == +2.0f && v_mm_set1_ps[3] == +2.0f); - - constexpr __m128 v_mm_set_ps1 = _mm_set_ps1(-2.0f); - static_assert(v_mm_set_ps1[0] == -2.0f && v_mm_set_ps1[1] == -2.0f && v_mm_set_ps1[2] == -2.0f && v_mm_set_ps1[3] == -2.0f); - - constexpr __m128 v_mm_set_ps = _mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f); - static_assert(v_mm_set_ps[0] == +3.0f && v_mm_set_ps[1] == +2.0f && v_mm_set_ps[2] == +1.0f && v_mm_set_ps[3] == +0.0f); - - constexpr __m128 v_mm_setr_ps = _mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f); - static_assert(v_mm_setr_ps[0] == +0.0f && v_mm_setr_ps[1] == +1.0f && v_mm_setr_ps[2] == +2.0f && v_mm_setr_ps[3] == +3.0f); - - constexpr __m128 v_mm_setzero_ps = _mm_setzero_ps(); - static_assert(v_mm_setzero_ps[0] == +0.0f && v_mm_setzero_ps[1] == +0.0f && v_mm_setzero_ps[2] == +0.0f && v_mm_setzero_ps[3] == +0.0f); - - constexpr __m128 v_mm_add_ss = _mm_add_ss(k1, k2); - static_assert(v_mm_add_ss[0] == +9.0f && v_mm_add_ss[1] == +0.0f && v_mm_add_ss[2] == +2.0f && v_mm_add_ss[3] == +4.0f); - - constexpr __m128 v_mm_add_ps = _mm_add_ps(k1, k2); - static_assert(v_mm_add_ps[0] == +9.0f && v_mm_add_ps[1] == +4.0f && v_mm_add_ps[2] == +4.0f && v_mm_add_ps[3] == +5.0f); - - constexpr __m128 v_mm_sub_ss = _mm_sub_ss(k1, k2); - static_assert(v_mm_sub_ss[0] == -7.0f && v_mm_sub_ss[1] == +0.0f && v_mm_sub_ss[2] == +2.0f && v_mm_sub_ss[3] == +4.0f); - - constexpr __m128 v_mm_sub_ps = _mm_sub_ps(k1, k2); - static_assert(v_mm_sub_ps[0] == -7.0f && v_mm_sub_ps[1] == -4.0f && v_mm_sub_ps[2] == +0.0f && v_mm_sub_ps[3] == +3.0f); - - constexpr __m128 v_mm_mul_ss = _mm_mul_ss(k1, k2); - static_assert(v_mm_mul_ss[0] == +8.0f && v_mm_mul_ss[1] == +0.0f && v_mm_mul_ss[2] == +2.0f && v_mm_mul_ss[3] == +4.0f); - - constexpr __m128 v_mm_mul_ps = _mm_mul_ps(k1, k2); - static_assert(v_mm_mul_ps[0] == +8.0f && v_mm_mul_ps[1] == +0.0f && v_mm_mul_ps[2] == +4.0f && v_mm_mul_ps[3] == +4.0f); - - constexpr __m128 v_mm_div_ss = _mm_div_ss(k1, k2); - static_assert(v_mm_div_ss[0] == +0.125f && v_mm_div_ss[1] == +0.0f && v_mm_div_ss[2] == +2.0f && v_mm_div_ss[3] == +4.0f); - - constexpr __m128 v_mm_div_ps = _mm_div_ps(k1, k2); - static_assert(v_mm_div_ps[0] == +0.125f && v_mm_div_ps[1] == +0.0f && v_mm_div_ps[2] == +1.0f && v_mm_div_ps[3] == +4.0f); - - constexpr __m128 v_mm_and_ps = _mm_and_ps(k3, k4); - static_assert(v_mm_and_ps[0] == +0.0f && v_mm_and_ps[1] == +0.0f && v_mm_and_ps[2] == +0.0f && v_mm_and_ps[3] == +0.0f); - - constexpr __m128 v_mm_andnot_ps = _mm_andnot_ps(k3, k4); - static_assert(v_mm_andnot_ps[0] == +0.0f && v_mm_andnot_ps[1] == +0.0f && v_mm_andnot_ps[2] == +0.0f && v_mm_andnot_ps[3] == +0.0f); - - constexpr __m128 v_mm_or_ps = _mm_or_ps(k3, k4); - static_assert(v_mm_or_ps[0] == -4.0f && v_mm_or_ps[1] == -5.0f && v_mm_or_ps[2] == -6.0f && v_mm_or_ps[3] == +7.0f); - - constexpr __m128 v_mm_xor_ps = _mm_xor_ps(k3, k4); - static_assert(v_mm_xor_ps[0] == -4.0f && v_mm_xor_ps[1] == +5.0f && v_mm_xor_ps[2] == -6.0f && v_mm_xor_ps[3] == +7.0f); - - constexpr __m128 v_mm_unpackhi_ps = _mm_unpackhi_ps(k1, k2); - static_assert(v_mm_unpackhi_ps[0] == +2.0f && v_mm_unpackhi_ps[1] == +2.0f && v_mm_unpackhi_ps[2] == +4.0f && v_mm_unpackhi_ps[3] == +1.0f); - - constexpr __m128 v_mm_unpacklo_ps = _mm_unpacklo_ps(k1, k2); - static_assert(v_mm_unpacklo_ps[0] == +1.0f && v_mm_unpacklo_ps[1] == +8.0f && v_mm_unpacklo_ps[2] == +0.0f && v_mm_unpacklo_ps[3] == +4.0f); - - constexpr __m128 v_mm_move_ss = _mm_move_ss(k1, k2); - static_assert(v_mm_move_ss[0] == +8.0f && v_mm_move_ss[1] == +0.0f && v_mm_move_ss[2] == +2.0f && v_mm_move_ss[3] == +4.0f); - - constexpr __m128 v_mm_movehl_ps = _mm_movehl_ps(k1, k2); - static_assert(v_mm_movehl_ps[0] == +2.0f && v_mm_movehl_ps[1] == +1.0f && v_mm_movehl_ps[2] == +2.0f && v_mm_movehl_ps[3] == +4.0f); - - constexpr __m128 v_mm_movelh_ps = _mm_movelh_ps(k1, k2); - static_assert(v_mm_movelh_ps[0] == +1.0f && v_mm_movelh_ps[1] == +0.0f && v_mm_movelh_ps[2] == +8.0f && v_mm_movelh_ps[3] == +4.0f); - - constexpr __m128 v_mm_cvtsi32_ss = _mm_cvtsi32_ss(k1, 42); - static_assert(v_mm_cvtsi32_ss[0] == 42.0f && v_mm_cvtsi32_ss[1] == +0.0f && v_mm_cvtsi32_ss[2] == +2.0f && v_mm_cvtsi32_ss[3] == +4.0f); - - constexpr __m128 v_mm_cvt_si2ss = _mm_cvt_si2ss(k2, -99); - static_assert(v_mm_cvt_si2ss[0] == -99.0f && v_mm_cvt_si2ss[1] == +4.0f && v_mm_cvt_si2ss[2] == +2.0f && v_mm_cvt_si2ss[3] == +1.0f); - - constexpr __m128 v_mm_cvtsi64_ss = _mm_cvtsi64_ss(k3, 555); - static_assert(v_mm_cvtsi64_ss[0] == 555.0f && v_mm_cvtsi64_ss[1] == -5.0f && v_mm_cvtsi64_ss[2] == +6.0f && v_mm_cvtsi64_ss[3] == +7.0f); - - static_assert(_mm_cvtss_f32(k2) == +8.0f); -} - -#endif \ No newline at end of file +TEST_CONSTEXPR(match_m128(_mm_xor_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, +5.0f, -6.0f, +0.0f)); diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 0603ca5f78b6a12..4287d3d4b5ec4eb 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -11,6 +11,7 @@ #include +#include "builtin_test_helpers.h" // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -43,6 +44,7 @@ __m128d test_mm_add_pd(__m128d A, __m128d B) { // CHECK: fadd <2 x double> return _mm_add_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_add_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -8.0)); __m128d test_mm_add_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_add_sd @@ -52,6 +54,7 @@ __m128d test_mm_add_sd(__m128d A, __m128d B) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_add_sd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_add_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -3.0)); __m128i test_mm_adds_epi8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_adds_epi8 @@ -84,6 +87,7 @@ __m128d test_mm_and_pd(__m128d A, __m128d B) { // CHECK: and <2 x i64> return _mm_and_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_and_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0)); __m128i test_mm_and_si128(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_and_si128 @@ -97,6 +101,7 @@ __m128d test_mm_andnot_pd(__m128d A, __m128d B) { // CHECK: and <2 x i64> return _mm_andnot_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_andnot_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0)); __m128i test_mm_andnot_si128(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_andnot_si128 @@ -133,11 +138,13 @@ __m128 test_mm_castpd_ps(__m128d A) { // CHECK-LABEL: test_mm_castpd_ps return _mm_castpd_ps(A); } +TEST_CONSTEXPR(match_m128(_mm_castpd_ps((__m128d){-1.0, +2.0}), +0.0f, -1.875f, +0.0f, +2.0f)); __m128i test_mm_castpd_si128(__m128d A) { // CHECK-LABEL: test_mm_castpd_si128 return _mm_castpd_si128(A); } +TEST_CONSTEXPR(match_m128i(_mm_castpd_si128((__m128d){-1.0, +2.0}), 0xBFF0000000000000ULL, 0x4000000000000000ULL)); __m128d test_mm_castps_pd(__m128 A) { // CHECK-LABEL: test_mm_castps_pd @@ -499,12 +506,14 @@ __m128d test_mm_cvtepi32_pd(__m128i A) { // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double> return _mm_cvtepi32_pd(A); } +TEST_CONSTEXPR(match_m128d(_mm_cvtepi32_pd((__m128i)(__v4si){-9, +8, -6, 0}), -9.0, +8.0)); __m128 test_mm_cvtepi32_ps(__m128i A) { // CHECK-LABEL: test_mm_cvtepi32_ps // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float> return _mm_cvtepi32_ps(A); } +TEST_CONSTEXPR(match_m128(_mm_cvtepi32_ps((__m128i)(__v4si){-3, +2, -1, 0}), -3.0f, +2.0f, -1.0f, +0.0f)); __m128i test_mm_cvtpd_epi32(__m128d A) { // CHECK-LABEL: test_mm_cvtpd_epi32 @@ -530,12 +539,14 @@ __m128d test_mm_cvtps_pd(__m128 A) { // CHECK: fpext <2 x float> %{{.*}} to <2 x double> return _mm_cvtps_pd(A); } +TEST_CONSTEXPR(match_m128d(_mm_cvtps_pd((__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +2.0)); double test_mm_cvtsd_f64(__m128d A) { // CHECK-LABEL: test_mm_cvtsd_f64 // CHECK: extractelement <2 x double> %{{.*}}, i32 0 return _mm_cvtsd_f64(A); } +TEST_CONSTEXPR(_mm_cvtsd_f64((__m128d){-4.0, +8.0}) == -4.0); int test_mm_cvtsd_si32(__m128d A) { // CHECK-LABEL: test_mm_cvtsd_si32 @@ -575,6 +586,7 @@ __m128d test_mm_cvtsi32_sd(__m128d A, int B) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_cvtsi32_sd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_cvtsi32_sd((__m128d){-99.0, +42.0}, 55), +55.0, +42.0)); __m128i test_mm_cvtsi32_si128(int A) { // CHECK-LABEL: test_mm_cvtsi32_si128 @@ -608,6 +620,7 @@ __m128d test_mm_cvtss_sd(__m128d A, __m128 B) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_cvtss_sd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_cvtss_sd((__m128d){+32.0, +8.0}, (__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +8.0)); __m128i test_mm_cvttpd_epi32(__m128d A) { // CHECK-LABEL: test_mm_cvttpd_epi32 @@ -640,6 +653,7 @@ __m128d test_mm_div_pd(__m128d A, __m128d B) { // CHECK: fdiv <2 x double> return _mm_div_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_div_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +4.0)); __m128d test_mm_div_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_div_sd @@ -649,6 +663,7 @@ __m128d test_mm_div_sd(__m128d A, __m128d B) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_div_sd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_div_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +8.0)); // Lowering to pextrw requires optimization. int test_mm_extract_epi16(__m128i A) { @@ -873,6 +888,7 @@ __m128d test_mm_move_sd(__m128d A, __m128d B) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_move_sd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_move_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -4.0, +8.0)); int test_mm_movemask_epi8(__m128i A) { // CHECK-LABEL: test_mm_movemask_epi8 @@ -899,6 +915,7 @@ __m128d test_mm_mul_pd(__m128d A, __m128d B) { // CHECK: fmul <2 x double> %{{.*}}, %{{.*}} return _mm_mul_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_mul_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, +15.0)); __m128d test_mm_mul_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_mul_sd @@ -908,6 +925,7 @@ __m128d test_mm_mul_sd(__m128d A, __m128d B) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_mul_sd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_mul_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, -3.0)); __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_mulhi_epi16 @@ -932,6 +950,7 @@ __m128d test_mm_or_pd(__m128d A, __m128d B) { // CHECK: or <2 x i64> %{{.*}}, %{{.*}} return _mm_or_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_or_pd((__m128d){+1.0, -3.0}, (__m128d){-0.0, +0.0}), -1.0, -3.0)); __m128i test_mm_or_si128(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_or_si128 @@ -1036,6 +1055,7 @@ __m128d test_mm_set_pd(double A, double B) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 return _mm_set_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_set_pd(-9.0, +3.0), +3.0, -9.0)); __m128d test_mm_set_pd1(double A) { // CHECK-LABEL: test_mm_set_pd1 @@ -1043,6 +1063,7 @@ __m128d test_mm_set_pd1(double A) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 return _mm_set_pd1(A); } +TEST_CONSTEXPR(match_m128d(_mm_set_pd1(+5.0), +5.0, +5.0)); __m128d test_mm_set_sd(double A) { // CHECK-LABEL: test_mm_set_sd @@ -1050,6 +1071,7 @@ __m128d test_mm_set_sd(double A) { // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1 return _mm_set_sd(A); } +TEST_CONSTEXPR(match_m128d(_mm_set_sd(+1.0), +1.0, +0.0)); __m128i test_mm_set1_epi8(char A) { // CHECK-LABEL: test_mm_set1_epi8 @@ -1114,6 +1136,7 @@ __m128d test_mm_set1_pd(double A) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 return _mm_set1_pd(A); } +TEST_CONSTEXPR(match_m128d(_mm_set1_pd(-42.0), -42.0, -42.0)); __m128i test_mm_setr_epi8(char A, char B, char C, char D, char E, char F, char G, char H, @@ -1175,18 +1198,21 @@ __m128d test_mm_setr_pd(double A, double B) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1 return _mm_setr_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_setr_pd(-9.0, +3.0), -9.0, +3.0)); __m128d test_mm_setzero_pd(void) { // CHECK-LABEL: test_mm_setzero_pd // CHECK: store <2 x double> zeroinitializer return _mm_setzero_pd(); } +TEST_CONSTEXPR(match_m128d(_mm_setzero_pd(), +0.0, +0.0)); __m128i test_mm_setzero_si128(void) { // CHECK-LABEL: test_mm_setzero_si128 // CHECK: store <2 x i64> zeroinitializer return _mm_setzero_si128(); } +TEST_CONSTEXPR(match_m128i(_mm_setzero_si128(), 0, 0)); __m128i test_mm_shuffle_epi32(__m128i A) { // CHECK-LABEL: test_mm_shuffle_epi32 @@ -1620,6 +1646,7 @@ __m128d test_mm_sub_pd(__m128d A, __m128d B) { // CHECK: fsub <2 x double> return _mm_sub_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_sub_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, +2.0)); __m128d test_mm_sub_sd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_sub_sd @@ -1629,6 +1656,7 @@ __m128d test_mm_sub_sd(__m128d A, __m128d B) { // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0 return _mm_sub_sd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_sub_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, -3.0)); __m128i test_mm_subs_epi8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_subs_epi8 @@ -1736,6 +1764,7 @@ __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) { // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> return _mm_unpackhi_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_unpackhi_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +8.0, -2.0)); __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_unpacklo_epi8 @@ -1766,123 +1795,17 @@ __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) { // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> return _mm_unpacklo_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_unpacklo_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +2.0, -4.0)); __m128d test_mm_xor_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_xor_pd // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} return _mm_xor_pd(A, B); } +TEST_CONSTEXPR(match_m128d(_mm_xor_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +1.0, +3.0)); __m128i test_mm_xor_si128(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_xor_si128 // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} return _mm_xor_si128(A, B); } - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) - -void test_constexpr() { - constexpr __m128d kd1 {+2.0,-1.0}; - constexpr __m128d kd2 {-4.0,-2.0}; - constexpr __m128d kd3 {-0.0,+0.0}; - - constexpr __m128 kf1 {-1.0f,+2.0f,-3.0f,+4.0f}; - - constexpr __m64 km1 {0x00000080FFFFFFF0ULL}; // -16,+128 - constexpr __m128i ki1 {0x00000010FFFFFFF8ULL, 0x00000001FFFFFFFFULL}; // -8,+16,-1,1 - - constexpr __m128d v_mm_set_sd = _mm_set_sd(1.0); - static_assert(v_mm_set_sd[0] == +1.0 && v_mm_set_sd[1] == +0.0); - - constexpr __m128d v_mm_set1_pd = _mm_set1_pd(2.0); - static_assert(v_mm_set1_pd[0] == +2.0 && v_mm_set1_pd[1] == +2.0); - - constexpr __m128d v_mm_set_pd1 = _mm_set_pd1(-2.0); - static_assert(v_mm_set_pd1[0] == -2.0 && v_mm_set_pd1[1] == -2.0); - - constexpr __m128d v_mm_set_pd = _mm_set_pd(+2.0, +3.0); - static_assert(v_mm_set_pd[0] == +3.0 && v_mm_set_pd[1] == +2.0); - - constexpr __m128d v_mm_setr_pd = _mm_setr_pd(+2.0, +3.0); - static_assert(v_mm_setr_pd[0] == +2.0 && v_mm_setr_pd[1] == +3.0); - - constexpr __m128d v_mm_setzero_pd = _mm_setzero_pd(); - static_assert(v_mm_setzero_pd[0] == +0.0 && v_mm_setzero_pd[1] == +0.0); - - constexpr __m128i v_mm_setzero_si128 = _mm_setzero_si128(); - static_assert(v_mm_setzero_si128[0] == 0x0000000000000000ULL && v_mm_setzero_si128[1] == 0x0000000000000000ULL); - - constexpr __m128d v_mm_add_sd = _mm_add_sd(kd1, kd2); - static_assert(v_mm_add_sd[0] == -2.0 && v_mm_add_sd[1] == -1.0); - - constexpr __m128d v_mm_add_pd = _mm_add_pd(kd1, kd2); - static_assert(v_mm_add_pd[0] == -2.0 && v_mm_add_pd[1] == -3.0); - - constexpr __m128d v_mm_sub_sd = _mm_sub_sd(kd1, kd2); - static_assert(v_mm_sub_sd[0] == +6.0 && v_mm_sub_sd[1] == -1.0); - - constexpr __m128d v_mm_sub_pd = _mm_sub_pd(kd1, kd2); - static_assert(v_mm_sub_pd[0] == +6.0 && v_mm_sub_pd[1] == +1.0); - - constexpr __m128d v_mm_mul_sd = _mm_mul_sd(kd1, kd2); - static_assert(v_mm_mul_sd[0] == -8.0 && v_mm_mul_sd[1] == -1.0); - - constexpr __m128d v_mm_mul_pd = _mm_mul_pd(kd1, kd2); - static_assert(v_mm_mul_pd[0] == -8.0 && v_mm_mul_pd[1] == +2.0); - - constexpr __m128d v_mm_div_sd = _mm_div_sd(kd1, kd2); - static_assert(v_mm_div_sd[0] == -0.5 && v_mm_div_sd[1] == -1.0); - - constexpr __m128d v_mm_div_pd = _mm_div_pd(kd1, kd2); - static_assert(v_mm_div_pd[0] == -0.5 && v_mm_div_pd[1] == +0.5); - - constexpr __m128d v_mm_and_pd = _mm_and_pd(kd1, kd3); - static_assert(v_mm_and_pd[0] == +0.0 && v_mm_and_pd[1] == +0.0); - - constexpr __m128d v_mm_andnot_pd = _mm_andnot_pd(kd1, kd3); - static_assert(v_mm_andnot_pd[0] == -0.0 && v_mm_andnot_pd[1] == +0.0); - - constexpr __m128d v_mm_or_pd = _mm_or_pd(kd1, kd3); - static_assert(v_mm_or_pd[0] == -2.0 && v_mm_or_pd[1] == -1.0); - - constexpr __m128d v_mm_xor_pd = _mm_xor_pd(kd2, kd3); - static_assert(v_mm_xor_pd[0] == +4.0 && v_mm_xor_pd[1] == -2.0); - - constexpr __m128d v_mm_cvtps_pd = _mm_cvtps_pd(kf1); - static_assert(v_mm_cvtps_pd[0] == -1.0 && v_mm_cvtps_pd[1] == +2.0); - - constexpr __m128d v_mm_cvtepi32_pd = _mm_cvtepi32_pd(ki1); - static_assert(v_mm_cvtepi32_pd[0] == -8.0 && v_mm_cvtepi32_pd[1] == +16.0); - - constexpr __m128 v_mm_cvtepi32_ps = _mm_cvtepi32_ps(ki1); - static_assert(v_mm_cvtepi32_ps[0] == -8.0f && v_mm_cvtepi32_ps[1] == +16.0f && v_mm_cvtepi32_ps[2] == -1.0f && v_mm_cvtepi32_ps[3] == +1.0f); - - constexpr __m128d v_mm_cvtsi32_sd = _mm_cvtsi32_sd(kd1, 8); - static_assert(v_mm_cvtsi32_sd[0] == +8.0 && v_mm_cvtsi32_sd[1] == -1.0); - - constexpr __m128d v_mm_cvtss_sd = _mm_cvtss_sd(kd2, kf1); - static_assert(v_mm_cvtss_sd[0] == -1.0 && v_mm_cvtss_sd[1] == -2.0); - - constexpr __m128d v_mm_cvtpi32_pd = _mm_cvtpi32_pd(km1); - static_assert(v_mm_cvtpi32_pd[0] == -16.0 && v_mm_cvtpi32_pd[1] == 128.0); - - static_assert(_mm_cvtsd_f64(kd2) == -4.0); - - constexpr __m128d v_mm_move_sd = _mm_move_sd(kd1, kd2); - static_assert(v_mm_move_sd[0] == -4.0 && v_mm_move_sd[1] == -1.0); - - constexpr __m128d v_mm_unpackhi_pd = _mm_unpackhi_pd(kd1, kd2); - static_assert(v_mm_unpackhi_pd[0] == -1.0f && v_mm_unpackhi_pd[1] == -2.0f); - - constexpr __m128d v_mm_unpacklo_pd = _mm_unpacklo_pd(kd1, kd2); - static_assert(v_mm_unpacklo_pd[0] == +2.0f && v_mm_unpacklo_pd[1] == -4.0f); - - constexpr __m128 v_mm_castpd_ps = _mm_castpd_ps(kd3); - static_assert(v_mm_castpd_ps[0] == -0.0f && v_mm_castpd_ps[1] == +0.0f && v_mm_castpd_ps[2] == +0.0f && v_mm_castpd_ps[3] == +0.0f); - - constexpr __m128i v_mm_castpd_si128 = _mm_castpd_si128(kd3); - static_assert(v_mm_castpd_si128[0] == 0x8000000000000000ULL && v_mm_castpd_si128[1] == 0x0000000000000000ULL); -} - -#endif diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c index 18c062f4c14a7db..d47c19b882cd1ef 100644 --- a/clang/test/CodeGen/X86/sse3-builtins.c +++ b/clang/test/CodeGen/X86/sse3-builtins.c @@ -5,6 +5,7 @@ #include +#include "builtin_test_helpers.h" // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll @@ -63,34 +64,18 @@ __m128d test_mm_movedup_pd(__m128d A) { // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer return _mm_movedup_pd(A); } +TEST_CONSTEXPR(match_m128d(_mm_movedup_pd((__m128d){+7.0, -7.0}), +7.0, +7.0)); __m128 test_mm_movehdup_ps(__m128 A) { // CHECK-LABEL: test_mm_movehdup_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_movehdup_ps(A); } +TEST_CONSTEXPR(match_m128(_mm_movehdup_ps((__m128){+1.0f,-1.0f,+2.0f,+4.0f}), -1.0f, -1.0f, +4.0f, +4.0f)); __m128 test_mm_moveldup_ps(__m128 A) { // CHECK-LABEL: test_mm_moveldup_ps // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> return _mm_moveldup_ps(A); } - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) - -void test_constexpr() { - constexpr __m128d kd1 {+7.0,-7.0}; - constexpr __m128 kf1 {+1.0f,-1.0f,+2.0f,+4.0f}; - - constexpr __m128d v_mm_movedup_pd = _mm_movedup_pd(kd1); - static_assert(v_mm_movedup_pd[0] == +7.0 && v_mm_movedup_pd[1] == +7.0); - - constexpr __m128 v_mm_movehdup_ps = _mm_movehdup_ps(kf1); - static_assert(v_mm_movehdup_ps[0] == -1.0f && v_mm_movehdup_ps[1] == -1.0f && v_mm_movehdup_ps[2] == +4.0f && v_mm_movehdup_ps[3] == +4.0f); - - constexpr __m128 v_mm_moveldup_ps = _mm_moveldup_ps(kf1); - static_assert(v_mm_moveldup_ps[0] == +1.0f && v_mm_moveldup_ps[1] == +1.0f && v_mm_moveldup_ps[2] == +2.0f && v_mm_moveldup_ps[3] == +2.0f); -} - -#endif +TEST_CONSTEXPR(match_m128(_mm_moveldup_ps((__m128){+1.0f,-1.0f,+2.0f,+4.0f}), +1.0f, +1.0f, +2.0f, +2.0f)); From 98c8d643539194321f3dba8698e95999165b1024 Mon Sep 17 00:00:00 2001 From: Lukacma Date: Tue, 29 Oct 2024 11:08:36 +0000 Subject: [PATCH 270/425] [AArch64] Add assembly/dissasembly for BFSCALE instructions (#113538) This patch adds assembly/disassembly for following instructions: BFSCALE (multiple and single vector) BFSCALE (multiple vectors) As specified in https://developer.arm.com/documentation/ddi0602/2024-09 Co-authored-by: Momchil Velikov [momchil.velikov@arm.com](mailto:momchil.velikov@arm.com) --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 5 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 12 ++- .../MC/AArch64/SME2/bfscale-diagnostics.s | 87 +++++++++++++++++ llvm/test/MC/AArch64/SME2/bfscale.s | 93 +++++++++++++++++++ 4 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MC/AArch64/SME2/bfscale-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2/bfscale.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 76e0501a5cc233e..b0ce9e1a1b406fc 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1000,6 +1000,11 @@ defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_fr } //[HasSMEF8F32] +let Predicates = [HasSME2, HasSVEBFSCALE] in { + defm BFSCALE : sme2_bfscale_single<"bfscale">; + defm BFSCALE : sme2_bfscale_multi<"bfscale">; +} + let Predicates = [HasSME2p2] in { def FTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, "ftmopa">; def FTMOPA_M2ZZZI_StoS : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, "ftmopa">; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index a05c5206320f714..62a0c2e83d4a2ea 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5189,6 +5189,16 @@ class sme2_luti4_vector_vg4_strided sz, bits<2> op, string mnemonic> let Inst{1-0} = Zd{1-0}; } +multiclass sme2_bfscale_single { + def _2ZZ : sme2_sve_destructive_vector_vg2_single<0b00, 0b0011000, ZZ_h_mul_r, ZPR4b16, mnemonic>; + def _4ZZ : sme2_sve_destructive_vector_vg4_single<0b00, 0b0011000, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; +} + +multiclass sme2_bfscale_multi { + def _2Z2Z : sme2_sve_destructive_vector_vg2_multi<0b00, 0b0011000, ZZ_h_mul_r, mnemonic>; + def _4Z4Z : sme2_sve_destructive_vector_vg4_multi<0b00, 0b0011000, ZZZZ_h_mul_r, mnemonic>; +} + class sme2_bf16_fp32_quarter_tile_outer_product : I<(outs TileOp32:$ZAda), (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), @@ -5334,4 +5344,4 @@ multiclass sme2_multi4_fmul_mm { multiclass sme2_bfmul_multi { def _2Z2Z : sme2_multi2_fmul_mm<0b00, mnemonic, ZZ_h_mul_r>; def _4Z4Z : sme2_multi4_fmul_mm<0b00, mnemonic, ZZZZ_h_mul_r>; -} +} \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SME2/bfscale-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfscale-diagnostics.s new file mode 100644 index 000000000000000..63367eed65b6c92 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bfscale-diagnostics.s @@ -0,0 +1,87 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale 2>&1 < %s| FileCheck %s + +// Multiple and single vector, 2 regs + +bfscale {z0.s-z1.s}, {z0.s-z1.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfscale {z1.h-z2.h}, {z1.h-z2.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +bfscale {z0.h-z2.h}, {z0.h-z2.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfscale {z0.h-z1.h}, {z0.h-z1.h}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +bfscale {z0.h-z1.h}, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +bfscale {z0.h-z1.h}, {z2.h-z3.h}, z8.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register list + +// Multiple and single vector, 4 regs + +bfscale {z0.s-z3.s}, {z0.s-z3.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfscale {z1.h-z4.h}, {z1.h-z4.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types + +bfscale {z0.h-z4.h}, {z0.h-z4.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +bfscale {z0.h-z3.h}, {z0.h-z3.h}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +bfscale {z0.h-z3.h}, {z0.h-z3.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h + +bfscale {z0.h-z3.h}, {z4.h-z7.h}, z8.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register list + +// Multiple vectors, 2 regs + +bfscale {z0.s-z1.s}, {z0.s-z1.s}, {z2.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfscale {z1.h-z2.h}, {z1.h-z2.h}, {z2.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +bfscale {z0.h-z2.h}, {z0.h-z4.h}, {z2.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +bfscale {z0.h-z1.h}, {z0.h-z1.h}, {z2.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfscale {z0.h-z1.h}, {z0.h-z1.h}, {z28.h-z30.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfscale {z0.h-z1.h}, {z0.h-z1.h}, {z29.h-z30.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types + +bfscale {z0.h-z1.h}, {z2.h-z3.h}, {z28.h-z29.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register list + +// Multiple vectors, 4 regs + +bfscale {z0.s-z3.s}, {z0.s-z3.s}, {z4.h-z7.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfscale {z1.h-z4.h}, {z1.h-z4.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types + +bfscale {z0.h-z4.h}, {z0.h-z4.h}, {z4.h-z7.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +bfscale {z0.h-z3.h}, {z0.h-z3.h}, {z4.s-z7.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfscale {z0.h-z3.h}, {z0.h-z3.h}, {z4.h-z8.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors + +bfscale {z0.h-z3.h}, {z0.h-z3.h}, {z5.h-z8.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types + +bfscale {z0.h-z3.h}, {z4.h-z7.h}, {z8.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register list \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SME2/bfscale.s b/llvm/test/MC/AArch64/SME2/bfscale.s new file mode 100644 index 000000000000000..719d013b235c053 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bfscale.s @@ -0,0 +1,93 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sve-bfscale < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sve-bfscale - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sve-bfscale < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sve-bfscale -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// Multiple and single vector, 2 regs + +bfscale {z0.h-z1.h}, {z0.h-z1.h}, z0.h // 11000001-00100000-10100001-10000000 +// CHECK-INST: bfscale { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x80,0xa1,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c120a180 + +bfscale {z20.h-z21.h}, {z20.h-z21.h}, z5.h // 11000001-00100101-10100001-10010100 +// CHECK-INST: bfscale { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x94,0xa1,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c125a194 + +bfscale {z30.h-z31.h}, {z30.h-z31.h}, z15.h // 11000001-00101111-10100001-10011110 +// CHECK-INST: bfscale { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x9e,0xa1,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c12fa19e + +// Multiple and single vector, 4 regs + +bfscale {z0.h-z3.h}, {z0.h-z3.h}, z0.h // 11000001-00100000-10101001-10000000 +// CHECK-INST: bfscale { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x80,0xa9,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c120a980 + +bfscale {z20.h-z23.h}, {z20.h-z23.h}, z5.h // 11000001-00100101-10101001-10010100 +// CHECK-INST: bfscale { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x94,0xa9,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c125a994 + +bfscale {z28.h-z31.h}, {z28.h-z31.h}, z15.h // 11000001-00101111-10101001-10011100 +// CHECK-INST: bfscale { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x9c,0xa9,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c12fa99c + +// Multiple vectors, 2 regs + +bfscale {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h} // 11000001-00100000-10110001-10000000 +// CHECK-INST: bfscale { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x80,0xb1,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c120b180 + +bfscale {z20.h-z21.h}, {z20.h-z21.h}, {z20.h-z21.h} // 11000001-00110100-10110001-10010100 +// CHECK-INST: bfscale { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x94,0xb1,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c134b194 + +bfscale {z30.h-z31.h}, {z30.h-z31.h}, {z30.h-z31.h} // 11000001-00111110-10110001-10011110 +// CHECK-INST: bfscale { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x9e,0xb1,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c13eb19e + +// Multiple vectors, 4 regs + +bfscale {z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h} // 11000001-00100000-10111001-10000000 +// CHECK-INST: bfscale { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0xb9,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c120b980 + +bfscale {z20.h-z23.h}, {z20.h-z23.h}, {z20.h-z23.h} // 11000001-00110100-10111001-10010100 +// CHECK-INST: bfscale { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x94,0xb9,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c134b994 + +bfscale {z28.h-z31.h}, {z28.h-z31.h}, {z28.h-z31.h} // 11000001-00111100-10111001-10011100 +// CHECK-INST: bfscale { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9c,0xb9,0x3c,0xc1] +// CHECK-ERROR: instruction requires: sme2 sve-bfscale +// CHECK-UNKNOWN: c13cb99c From 06664fdc7680f7f9fa9b0a414a8fb8df2f913d48 Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 29 Oct 2024 11:41:25 +0000 Subject: [PATCH 271/425] [FuncSpec] Enable SpecializeLiteralConstant by default (#113442) Enable specialization on literal constant arguments by default in Function Specialization. --------- Co-authored-by: Alexandros Lamprineas --- .../Transforms/IPO/FunctionSpecialization.h | 7 + .../Transforms/IPO/FunctionSpecialization.cpp | 20 ++- .../Generic/ipsccp-remap-assign-id.ll | 4 +- .../compiler-crash-58759.ll | 2 +- ...tion-specialization-constant-expression.ll | 2 +- .../function-specialization2.ll | 6 +- .../function-specialization4.ll | 6 +- .../get-possible-constants.ll | 2 +- .../FunctionSpecialization/global-rank.ll | 2 +- .../identical-specializations.ll | 2 +- .../FunctionSpecialization/literal-const.ll | 2 +- .../specialize-multiple-arguments.ll | 8 +- .../track-ptr-return.ll | 147 ++++++++++++++++++ 13 files changed, 182 insertions(+), 28 deletions(-) create mode 100644 llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index 5920dde9d77dfd9..f20b01c186306d9 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -64,6 +64,13 @@ // - Perhaps a post-inlining function specialization pass could be more // aggressive on literal constants. // +// Limitations: +// ------------ +// - We are unable to consider specializations of functions called from indirect +// callsites whose pointer operand has a lattice value that is known to be +// constant, either from IPSCCP or previous iterations of FuncSpec. This is +// because SCCP has not yet replaced the uses of the known constant. +// // References: // ----------- // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 48971e9a46355c4..8e6993d35d49917 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -84,14 +84,11 @@ static cl::opt SpecializeOnAddress( "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc( "Enable function specialization on the address of global values")); -// Disabled by default as it can significantly increase compilation times. -// -// https://llvm-compile-time-tracker.com -// https://github.com/nikic/llvm-compile-time-tracker static cl::opt SpecializeLiteralConstant( - "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc( - "Enable specialization of functions that take a literal constant as an " - "argument")); + "funcspec-for-literal-constant", cl::init(true), cl::Hidden, + cl::desc( + "Enable specialization of functions that take a literal constant as an " + "argument")); bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ, DenseSet &DeadBlocks) { @@ -682,10 +679,11 @@ bool FunctionSpecializer::run() { (RequireMinSize && Metrics.NumInsts < MinFunctionSize)) continue; - // TODO: For now only consider recursive functions when running multiple - // times. This should change if specialization on literal constants gets - // enabled. - if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant) + // When specialization on literal constants is disabled, only consider + // recursive functions when running multiple times to save wasted analysis, + // as we will not be able to specialize on any newly found literal constant + // return values. + if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive) continue; int64_t Sz = *Metrics.NumInsts.getValue(); diff --git a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll index 0e8f92cacf66d73..42560fc3958d1b6 100644 --- a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll +++ b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes=ipsccp %s -S -o - | FileCheck %s -; RUN: opt --try-experimental-debuginfo-iterators -passes=ipsccp %s -S -o - | FileCheck %s +; RUN: opt -passes=ipsccp -funcspec-for-literal-constant=false %s -S -o - | FileCheck %s +; RUN: opt --try-experimental-debuginfo-iterators -passes=ipsccp -funcspec-for-literal-constant=false %s -S -o - | FileCheck %s ;; Check the dbg.assign DIAssignID operand gets remapped after cloning. diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll index f29cf0d123939a9..7291d83b816115a 100644 --- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll +++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="default" < %s | FileCheck %s +; RUN: opt -S --passes="default" -funcspec-for-literal-constant=false < %s | FileCheck %s define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll index 16a468511631293..0c24169d02c2c56 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll @@ -4,7 +4,7 @@ ; Note that this test case shows that function specialization pass would ; transform the function even if no specialization happened. -; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -force-specialization -funcspec-for-literal-constant=false -S < %s | FileCheck %s %struct = type { i8, i16, i32, i64, i64} @Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4} diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll index ef830a0e9a4a9e7..6f36a394979d81b 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 -; RUN: opt -passes="ipsccp,deadargelim" -force-specialization -S < %s | FileCheck %s -; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE-ITER -; RUN: opt -passes="ipsccp,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-for-literal-constant=false -force-specialization -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-for-literal-constant=false -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE-ITER +; RUN: opt -passes="ipsccp,deadargelim" -funcspec-for-literal-constant=false -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) { diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll index 4e5a196d6682912..a6a990c34159363 100644 --- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll +++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll @@ -1,8 +1,10 @@ ; RUN: opt -passes="ipsccp" -force-specialization \ -; RUN: -funcspec-max-clones=2 -S < %s | FileCheck %s +; RUN: -funcspec-for-literal-constant=false -funcspec-max-clones=2 \ +; RUN: -S < %s | FileCheck %s ; RUN: opt -passes="ipsccp" -force-specialization \ -; RUN: -funcspec-max-clones=1 -S < %s | FileCheck %s --check-prefix=CONST1 +; RUN: -funcspec-for-literal-constant=false -funcspec-max-clones=1 \ +; RUN: -S < %s | FileCheck %s --check-prefix=CONST1 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll index dfa1e5a42776a5f..2f42125d8cf9799 100644 --- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll +++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="ipsccp" < %s | FileCheck %s +; RUN: opt -S --passes="ipsccp" -funcspec-for-literal-constant=false < %s | FileCheck %s define dso_local i32 @p0(i32 noundef %x) { entry: %add = add nsw i32 %x, 1 diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll index 1926e29ddee0136..06185332f22e0cd 100644 --- a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll +++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll @@ -1,4 +1,4 @@ -; RUN: opt -S --passes="ipsccp" -funcspec-max-clones=1 < %s | FileCheck %s +; RUN: opt -S --passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=1 < %s | FileCheck %s define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline { entry: %call = tail call i32 %p(i32 noundef %x) diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll index 930ed6627f7f1e6..97d77971a92d3b1 100644 --- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll +++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 -; RUN: opt -passes="ipsccp" -force-specialization -S < %s | FileCheck %s +; RUN: opt -passes="ipsccp" -force-specialization -funcspec-for-literal-constant=false -S < %s | FileCheck %s define i64 @main(i64 %x, i64 %y, i1 %flag) { entry: diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll index 3eae3dc261fb2ac..7d5e506064af778 100644 --- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll +++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll @@ -1,7 +1,7 @@ ; RUN: opt -S --passes="ipsccp" \ +; RUN: -funcspec-for-literal-constant=false \ ; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT ; RUN: opt -S --passes="ipsccp" \ -; RUN: -funcspec-for-literal-constant \ ; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT define i32 @f0(i32 noundef %x) { diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll index a653760abb2cc65..73291600edb85de 100644 --- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll +++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes="ipsccp" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE -; RUN: opt -passes="ipsccp" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE -; RUN: opt -passes="ipsccp" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO -; RUN: opt -passes="ipsccp" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO +; RUN: opt -passes="ipsccp" -funcspec-for-literal-constant=false -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE ; Make sure that we iterate correctly after sorting the specializations: ; FnSpecialization: Specializations for function compute diff --git a/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll new file mode 100644 index 000000000000000..f4ba0e72a1b4397 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; RUN: opt -passes="ipsccp" -force-specialization \ +; RUN: -funcspec-max-iters=3 -S < %s | FileCheck %s + +; RUN: opt -passes="ipsccp" -force-specialization \ +; RUN: -funcspec-for-literal-constant=false -funcspec-max-iters=3 \ +; RUN: -S < %s | FileCheck %s --check-prefix=NOLIT + +@global_true = constant i1 true +@global_false = constant i1 false + +define i64 @main() { +entry: + %op1 = call ptr @select_op(ptr @global_true) + %op2 = call ptr @select_op(ptr @global_false) + + %c1 = call i64 @compute(ptr %op1) + %c2 = call i64 @compute(ptr %op2) + %add = add i64 %c1, %c2 + ret i64 %add +} + +define ptr @select_op(ptr %flag) { + %flag.val = load i1, ptr %flag + %op = select i1 %flag.val, ptr @plus, ptr @minus + ret ptr %op +} + +define internal i64 @compute(ptr %op) { +entry: + %res = call i64 %op(i64 1) + ret i64 %res +} + +define internal i64 @plus(i64 %x) { +entry: + %sum = add i64 %x, 1 + ret i64 %sum +} + +define internal i64 @minus(i64 %x) { +entry: + %diff = sub i64 %x, 1 + ret i64 %diff +} +; CHECK-LABEL: define i64 @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true) +; CHECK-NEXT: [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false) +; CHECK-NEXT: [[C1:%.*]] = call i64 @compute.specialized.3(ptr @plus) +; CHECK-NEXT: [[C2:%.*]] = call i64 @compute.specialized.4(ptr @minus) +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[C1]], [[C2]] +; CHECK-NEXT: ret i64 [[ADD]] +; +; +; CHECK-LABEL: define ptr @select_op( +; CHECK-SAME: ptr [[FLAG:%.*]]) { +; CHECK-NEXT: [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1 +; CHECK-NEXT: [[OP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus +; CHECK-NEXT: ret ptr [[OP]] +; +; +; CHECK-LABEL: define internal i64 @plus( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SUM:%.*]] = add i64 [[X]], 1 +; CHECK-NEXT: ret i64 [[SUM]] +; +; +; CHECK-LABEL: define internal i64 @minus( +; CHECK-SAME: i64 [[X:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[X]], 1 +; CHECK-NEXT: ret i64 [[DIFF]] +; +; +; CHECK-LABEL: define internal ptr @select_op.specialized.1( +; CHECK-SAME: ptr [[FLAG:%.*]]) { +; CHECK-NEXT: ret ptr poison +; +; +; CHECK-LABEL: define internal ptr @select_op.specialized.2( +; CHECK-SAME: ptr [[FLAG:%.*]]) { +; CHECK-NEXT: ret ptr poison +; +; +; CHECK-LABEL: define internal i64 @compute.specialized.3( +; CHECK-SAME: ptr [[OP:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @plus(i64 1) +; CHECK-NEXT: ret i64 [[RES]] +; +; +; CHECK-LABEL: define internal i64 @compute.specialized.4( +; CHECK-SAME: ptr [[OP:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = call i64 @minus(i64 1) +; CHECK-NEXT: ret i64 [[RES]] +; +; +; NOLIT-LABEL: define i64 @main() { +; NOLIT-NEXT: [[ENTRY:.*:]] +; NOLIT-NEXT: [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true) +; NOLIT-NEXT: [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false) +; NOLIT-NEXT: [[C1:%.*]] = call i64 @compute(ptr @plus) +; NOLIT-NEXT: [[C2:%.*]] = call i64 @compute(ptr @minus) +; NOLIT-NEXT: [[ADD:%.*]] = add i64 [[C1]], [[C2]] +; NOLIT-NEXT: ret i64 [[ADD]] +; +; +; NOLIT-LABEL: define ptr @select_op( +; NOLIT-SAME: ptr [[FLAG:%.*]]) { +; NOLIT-NEXT: [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1 +; NOLIT-NEXT: [[OP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus +; NOLIT-NEXT: ret ptr [[OP]] +; +; +; NOLIT-LABEL: define internal i64 @compute( +; NOLIT-SAME: ptr [[OP:%.*]]) { +; NOLIT-NEXT: [[ENTRY:.*:]] +; NOLIT-NEXT: [[RES:%.*]] = call i64 [[OP]](i64 1) +; NOLIT-NEXT: ret i64 [[RES]] +; +; +; NOLIT-LABEL: define internal i64 @plus( +; NOLIT-SAME: i64 [[X:%.*]]) { +; NOLIT-NEXT: [[ENTRY:.*:]] +; NOLIT-NEXT: [[SUM:%.*]] = add i64 [[X]], 1 +; NOLIT-NEXT: ret i64 [[SUM]] +; +; +; NOLIT-LABEL: define internal i64 @minus( +; NOLIT-SAME: i64 [[X:%.*]]) { +; NOLIT-NEXT: [[ENTRY:.*:]] +; NOLIT-NEXT: [[DIFF:%.*]] = sub i64 [[X]], 1 +; NOLIT-NEXT: ret i64 [[DIFF]] +; +; +; NOLIT-LABEL: define internal ptr @select_op.specialized.1( +; NOLIT-SAME: ptr [[FLAG:%.*]]) { +; NOLIT-NEXT: ret ptr poison +; +; +; NOLIT-LABEL: define internal ptr @select_op.specialized.2( +; NOLIT-SAME: ptr [[FLAG:%.*]]) { +; NOLIT-NEXT: ret ptr poison +; From 46944d1f950d042695197038ab3f1bf25ace261b Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 29 Oct 2024 06:43:40 -0500 Subject: [PATCH 272/425] [flang][OpenMP] Extract OMP version hint into helper functions, NFC (#113621) --- flang/lib/Semantics/check-omp-structure.cpp | 24 +++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 599cc61a83bf0ae..0c3a0e76df6a6af 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -38,6 +38,16 @@ namespace Fortran::semantics { CheckAllowedClause(llvm::omp::Y); \ } +std::string ThisVersion(unsigned version) { + std::string tv{ + std::to_string(version / 10) + "." + std::to_string(version % 10)}; + return "OpenMP v" + tv; +} + +std::string TryVersion(unsigned version) { + return "try -fopenmp-version=" + std::to_string(version); +} + // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment // statements and the expressions enclosed in an OpenMP Workshare construct class OmpWorkshareBlockChecker { @@ -200,14 +210,10 @@ bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) { auto clauseName{parser::ToUpperCaseLetters(getClauseName(clause).str())}; auto dirName{parser::ToUpperCaseLetters(getDirectiveName(dir).str())}; - std::string thisVersion{ - std::to_string(version / 10) + "." + std::to_string(version % 10)}; - std::string goodVersion{std::to_string(allowedInVersion)}; - context_.Say(dirCtx.clauseSource, - "%s clause is not allowed on directive %s in OpenMP v%s, " - "try -fopenmp-version=%d"_err_en_US, - clauseName, dirName, thisVersion, allowedInVersion); + "%s clause is not allowed on directive %s in %s, %s"_err_en_US, + clauseName, dirName, ThisVersion(version), + TryVersion(allowedInVersion)); } } return CheckAllowed(clause); @@ -3378,8 +3384,8 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Lastprivate &x) { std::to_string(version / 10) + "." + std::to_string(version % 10)}; context_.Say(GetContext().clauseSource, "LASTPRIVATE clause with CONDITIONAL modifier is not " - "allowed in OpenMP v%s, try -fopenmp-version=%d"_err_en_US, - thisVersion, allowedInVersion); + "allowed in %s, %s"_err_en_US, + ThisVersion(version), TryVersion(allowedInVersion)); } } } From 2443549b853908352a3b7b9bd6c07616492814a1 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 29 Oct 2024 11:44:53 +0000 Subject: [PATCH 273/425] [IR] Remove some uses of StructType::setBody. NFC. (#113685) It is simple to create the struct body up front, now that we have transitioned to opaque pointers. --- llvm/lib/CodeGen/ShadowStackGCLowering.cpp | 7 +++---- llvm/lib/Target/X86/X86WinEHState.cpp | 8 +++---- llvm/lib/Transforms/Coroutines/CoroEarly.cpp | 4 ++-- llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 22 +++++++++++--------- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index 232e5e2bb886dfd..f8ab44124b3ae8e 100644 --- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -242,13 +242,12 @@ bool ShadowStackGCLoweringImpl::doInitialization(Module &M) { // void *Roots[]; // Stack roots (in-place array, so we pretend). // }; - StackEntryTy = StructType::create(M.getContext(), "gc_stackentry"); + PointerType *StackEntryPtrTy = PointerType::getUnqual(M.getContext()); EltTys.clear(); - EltTys.push_back(PointerType::getUnqual(StackEntryTy)); + EltTys.push_back(StackEntryPtrTy); EltTys.push_back(FrameMapPtrTy); - StackEntryTy->setBody(EltTys); - PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + StackEntryTy = StructType::create(EltTys, "gc_stackentry"); // Get the root chain if it already exists. Head = M.getGlobalVariable("llvm_gc_root_chain"); diff --git a/llvm/lib/Target/X86/X86WinEHState.cpp b/llvm/lib/Target/X86/X86WinEHState.cpp index bc9fd801f94b224..ef2127367301143 100644 --- a/llvm/lib/Target/X86/X86WinEHState.cpp +++ b/llvm/lib/Target/X86/X86WinEHState.cpp @@ -210,13 +210,11 @@ Type *WinEHStatePass::getEHLinkRegistrationType() { if (EHLinkRegistrationTy) return EHLinkRegistrationTy; LLVMContext &Context = TheModule->getContext(); - EHLinkRegistrationTy = StructType::create(Context, "EHRegistrationNode"); Type *FieldTys[] = { - PointerType::getUnqual( - EHLinkRegistrationTy->getContext()), // EHRegistrationNode *Next - PointerType::getUnqual(Context) // EXCEPTION_DISPOSITION (*Handler)(...) + PointerType::getUnqual(Context), // EHRegistrationNode *Next + PointerType::getUnqual(Context) // EXCEPTION_DISPOSITION (*Handler)(...) }; - EHLinkRegistrationTy->setBody(FieldTys, false); + EHLinkRegistrationTy = StructType::create(FieldTys, "EHRegistrationNode"); return EHLinkRegistrationTy; } diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index a3674306f3e10e6..5375448d2d2e2b8 100644 --- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -123,11 +123,11 @@ void Lowerer::lowerCoroNoop(IntrinsicInst *II) { Module &M = *II->getModule(); // Create a noop.frame struct type. - StructType *FrameTy = StructType::create(C, "NoopCoro.Frame"); auto *FnTy = FunctionType::get(Type::getVoidTy(C), Builder.getPtrTy(0), /*isVarArg=*/false); auto *FnPtrTy = Builder.getPtrTy(0); - FrameTy->setBody({FnPtrTy, FnPtrTy}); + StructType *FrameTy = + StructType::create({FnPtrTy, FnPtrTy}, "NoopCoro.Frame"); // Create a Noop function that does nothing. Function *NoopFn = diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 021fcc20c1f18bb..bb6126026d90581 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -290,8 +290,8 @@ class FrameTypeBuilder { return Fields.size() - 1; } - /// Finish the layout and set the body on the given type. - void finish(StructType *Ty); + /// Finish the layout and create the struct type with the given name. + StructType *finish(StringRef Name); uint64_t getStructSize() const { assert(IsFinished && "not yet finished!"); @@ -464,7 +464,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F, }); } -void FrameTypeBuilder::finish(StructType *Ty) { +StructType *FrameTypeBuilder::finish(StringRef Name) { assert(!IsFinished && "already finished!"); // Prepare the optimal-layout field array. @@ -526,7 +526,7 @@ void FrameTypeBuilder::finish(StructType *Ty) { LastOffset = Offset + F.Size; } - Ty->setBody(FieldTypes, Packed); + StructType *Ty = StructType::create(Context, FieldTypes, Name, Packed); #ifndef NDEBUG // Check that the IR layout matches the offsets we expect. @@ -538,6 +538,8 @@ void FrameTypeBuilder::finish(StructType *Ty) { #endif IsFinished = true; + + return Ty; } static void cacheDIVar(FrameDataInfo &FrameData, @@ -866,11 +868,6 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, bool OptimizeFrame) { LLVMContext &C = F.getContext(); const DataLayout &DL = F.getDataLayout(); - StructType *FrameTy = [&] { - SmallString<32> Name(F.getName()); - Name.append(".Frame"); - return StructType::create(C, Name); - }(); // We will use this value to cap the alignment of spilled values. std::optional MaxFrameAlignment; @@ -931,7 +928,12 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape, FrameData.setFieldIndex(S.first, Id); } - B.finish(FrameTy); + StructType *FrameTy = [&] { + SmallString<32> Name(F.getName()); + Name.append(".Frame"); + return B.finish(Name); + }(); + FrameData.updateLayoutIndex(B); Shape.FrameAlign = B.getStructAlign(); Shape.FrameSize = B.getStructSize(); From ec427df2b9c04cb3323babcf680dad8dcefaf228 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 29 Oct 2024 11:50:29 +0000 Subject: [PATCH 274/425] [AArch64] Add assembly/disassembly for FMOP4{A,S} (non-widening) half-precision instructions (#113343) The new instructions are described in https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 2 + .../AArch64/AsmParser/AArch64AsmParser.cpp | 3 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 39 +++- .../fmop4as-fp16-non-widening-diagnostics.s | 220 ++++++++++++++++++ .../SME2p2/fmop4as-fp16-non-widening.s | 179 ++++++++++++++ 5 files changed, 442 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index b0ce9e1a1b406fc..ae40911cc62a877 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1033,6 +1033,8 @@ let Predicates = [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR] in { let Predicates = [HasSME2p2, HasSMEF16F16] in { def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">; + defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a">; + defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s">; } // [HasSME2p2, HasSMEF16F16] let Predicates = [HasSME2, HasSVEBFSCALE] in { diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 8e267e65862210f..5a487be5723ce92 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -6262,6 +6262,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, case Match_InvalidMatrixTileVectorV128: return Error(Loc, "invalid matrix operand, expected za[0-15]h.q or za[0-15]v.q"); + case Match_InvalidMatrixTile16: + return Error(Loc, "invalid matrix operand, expected za[0-1].h"); case Match_InvalidMatrixTile32: return Error(Loc, "invalid matrix operand, expected za[0-3].s"); case Match_InvalidMatrixTile64: @@ -6882,6 +6884,7 @@ bool AArch64AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidSVEExactFPImmOperandHalfOne: case Match_InvalidSVEExactFPImmOperandHalfTwo: case Match_InvalidSVEExactFPImmOperandZeroOne: + case Match_InvalidMatrixTile16: case Match_InvalidMatrixTile32: case Match_InvalidMatrixTile64: case Match_InvalidMatrix: diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 62a0c2e83d4a2ea..330c540ffde4432 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5344,4 +5344,41 @@ multiclass sme2_multi4_fmul_mm { multiclass sme2_bfmul_multi { def _2Z2Z : sme2_multi2_fmul_mm<0b00, mnemonic, ZZ_h_mul_r>; def _4Z4Z : sme2_multi4_fmul_mm<0b00, mnemonic, ZZZZ_h_mul_r>; -} \ No newline at end of file +} + +class sme2_fp16_quarter_tile_outer_product + : I<(outs TileOp16:$ZAda), + (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bit ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000001000; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3-1} = 0b100; + let Inst{0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmop4as_fp16_non_widening { + // Single vectors + def _MZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_H : sme2_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; +} diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s new file mode 100644 index 000000000000000..0272721e083621d --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s @@ -0,0 +1,220 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f16f16 < %s 2>&1 | FileCheck %s + +// FMOP4A + +// Single vectors + +fmop4a za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za2.h, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.h, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4a za0.h, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4a za0.h, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4a za0.h, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.h, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.h, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.h, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +fmop4a za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za2.h, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.h, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4a za0.h, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4a za0.h, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4a za0.h, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.h, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.h, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4a za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za2.h, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.h, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4a za0.h, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.h, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.h, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.h, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.h, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +fmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za2.h, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.h, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.h, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.h, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.h, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.h, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.h, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + + +// FMOP4S + +// Single vectors + +fmop4s za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za2.h, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.h, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4s za0.h, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4s za0.h, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4s za0.h, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.h, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.h, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.h, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +fmop4s za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za2.h, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.h, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4s za0.h, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4s za0.h, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +fmop4s za0.h, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.h, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.h, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4s za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za2.h, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.h, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4s za0.h, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.h, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.h, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.h, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.h, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +fmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za2.h, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.h, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.h, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.h, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.h, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.h, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.h, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s new file mode 100644 index 000000000000000..2a94acd35e95c30 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s @@ -0,0 +1,179 @@ + +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f16f16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f16f16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f16f16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f16f16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f16f16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +// FMOP4A + +// Single vectors + +fmop4a za0.h, z0.h, z16.h // 10000001-00000000-00000000-00001000 +// CHECK-INST: fmop4a za0.h, z0.h, z16.h +// CHECK-ENCODING: [0x08,0x00,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81000008 + +fmop4a za1.h, z12.h, z24.h // 10000001-00001000-00000001-10001001 +// CHECK-INST: fmop4a za1.h, z12.h, z24.h +// CHECK-ENCODING: [0x89,0x01,0x08,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81080189 + +fmop4a za1.h, z14.h, z30.h // 10000001-00001110-00000001-11001001 +// CHECK-INST: fmop4a za1.h, z14.h, z30.h +// CHECK-ENCODING: [0xc9,0x01,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 810e01c9 + +// Single and multiple vectors + +fmop4a za0.h, z0.h, {z16.h-z17.h} // 10000001-00010000-00000000-00001000 +// CHECK-INST: fmop4a za0.h, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x08,0x00,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81100008 + +fmop4a za1.h, z12.h, {z24.h-z25.h} // 10000001-00011000-00000001-10001001 +// CHECK-INST: fmop4a za1.h, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x89,0x01,0x18,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81180189 + +fmop4a za1.h, z14.h, {z30.h-z31.h} // 10000001-00011110-00000001-11001001 +// CHECK-INST: fmop4a za1.h, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x01,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 811e01c9 + +// Multiple and single vectors + +fmop4a za0.h, {z0.h-z1.h}, z16.h // 10000001-00000000-00000010-00001000 +// CHECK-INST: fmop4a za0.h, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x08,0x02,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81000208 + +fmop4a za1.h, {z12.h-z13.h}, z24.h // 10000001-00001000-00000011-10001001 +// CHECK-INST: fmop4a za1.h, { z12.h, z13.h }, z24.h +// CHECK-ENCODING: [0x89,0x03,0x08,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81080389 + +fmop4a za1.h, {z14.h-z15.h}, z30.h // 10000001-00001110-00000011-11001001 +// CHECK-INST: fmop4a za1.h, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xc9,0x03,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 810e03c9 + +// Multiple vectors + +fmop4a za0.h, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00010000-00000010-00001000 +// CHECK-INST: fmop4a za0.h, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x08,0x02,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81100208 + +fmop4a za1.h, {z12.h-z13.h}, {z24.h-z25.h} // 10000001-00011000-00000011-10001001 +// CHECK-INST: fmop4a za1.h, { z12.h, z13.h }, { z24.h, z25.h } +// CHECK-ENCODING: [0x89,0x03,0x18,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81180389 + +fmop4a za1.h, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00011110-00000011-11001001 +// CHECK-INST: fmop4a za1.h, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x03,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 811e03c9 + +// FMOP4S + +// Single vectors + +fmop4s za0.h, z0.h, z16.h // 10000001-00000000-00000000-00011000 +// CHECK-INST: fmop4s za0.h, z0.h, z16.h +// CHECK-ENCODING: [0x18,0x00,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81000018 + +fmop4s za1.h, z12.h, z24.h // 10000001-00001000-00000001-10011001 +// CHECK-INST: fmop4s za1.h, z12.h, z24.h +// CHECK-ENCODING: [0x99,0x01,0x08,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81080199 + +fmop4s za1.h, z14.h, z30.h // 10000001-00001110-00000001-11011001 +// CHECK-INST: fmop4s za1.h, z14.h, z30.h +// CHECK-ENCODING: [0xd9,0x01,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 810e01d9 + +// Single and multiple vectors + +fmop4s za0.h, z0.h, {z16.h-z17.h} // 10000001-00010000-00000000-00011000 +// CHECK-INST: fmop4s za0.h, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x18,0x00,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81100018 + +fmop4s za1.h, z12.h, {z24.h-z25.h} // 10000001-00011000-00000001-10011001 +// CHECK-INST: fmop4s za1.h, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x99,0x01,0x18,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81180199 + +fmop4s za1.h, z14.h, {z30.h-z31.h} // 10000001-00011110-00000001-11011001 +// CHECK-INST: fmop4s za1.h, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x01,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 811e01d9 + +// Multiple and single vectors + +fmop4s za0.h, {z0.h-z1.h}, z16.h // 10000001-00000000-00000010-00011000 +// CHECK-INST: fmop4s za0.h, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x18,0x02,0x00,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81000218 + +fmop4s za1.h, {z12.h-z13.h}, z24.h // 10000001-00001000-00000011-10011001 +// CHECK-INST: fmop4s za1.h, { z12.h, z13.h }, z24.h +// CHECK-ENCODING: [0x99,0x03,0x08,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81080399 + +fmop4s za1.h, {z14.h-z15.h}, z30.h // 10000001-00001110-00000011-11011001 +// CHECK-INST: fmop4s za1.h, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xd9,0x03,0x0e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 810e03d9 + +// Multiple vectors + +fmop4s za0.h, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00010000-00000010-00011000 +// CHECK-INST: fmop4s za0.h, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x18,0x02,0x10,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81100218 + +fmop4s za1.h, {z12.h-z13.h}, {z24.h-z25.h} // 10000001-00011000-00000011-10011001 +// CHECK-INST: fmop4s za1.h, { z12.h, z13.h }, { z24.h, z25.h } +// CHECK-ENCODING: [0x99,0x03,0x18,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 81180399 + +fmop4s za1.h, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00011110-00000011-11011001 +// CHECK-INST: fmop4s za1.h, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x03,0x1e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16 +// CHECK-UNKNOWN: 811e03d9 From e19a5fc6d306a81d181a9597a8b25c444c08d722 Mon Sep 17 00:00:00 2001 From: Hari Limaye Date: Tue, 29 Oct 2024 11:53:12 +0000 Subject: [PATCH 275/425] [FuncSpec] Improve accounting of specialization codesize growth (#113448) Only accumulate the codesize increase of functions that are actually specialized, rather than for every candidate specialization that we analyse. This fixes a subtle bug where prior analysis of candidate specializations that were deemed unprofitable could prevent subsequent profitable candidates from being recognised. --- .../Transforms/IPO/FunctionSpecialization.h | 11 +++-- .../Transforms/IPO/FunctionSpecialization.cpp | 41 +++++++++-------- .../FunctionSpecialization/maxgrowth.ll | 44 +++++++++++++++++++ 3 files changed, 73 insertions(+), 23 deletions(-) create mode 100644 llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h index f20b01c186306d9..e82155a6c72974f 100644 --- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h +++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h @@ -138,13 +138,16 @@ struct Spec { // Profitability of the specialization. unsigned Score; + // Number of instructions in the specialization. + unsigned CodeSize; + // List of call sites, matching this specialization. SmallVector CallSites; - Spec(Function *F, const SpecSig &S, unsigned Score) - : F(F), Sig(S), Score(Score) {} - Spec(Function *F, const SpecSig &&S, unsigned Score) - : F(F), Sig(S), Score(Score) {} + Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize) + : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} + Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize) + : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {} }; class InstCostVisitor : public InstVisitor { diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index 8e6993d35d49917..919d3143a13f7e7 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -643,6 +643,18 @@ FunctionSpecializer::~FunctionSpecializer() { cleanUpSSA(); } +/// Get the unsigned Value of given Cost object. Assumes the Cost is always +/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and +/// always Valid. +static unsigned getCostValue(const Cost &C) { + int64_t Value = *C.getValue(); + + assert(Value >= 0 && "CodeSize and Latency cannot be negative"); + // It is safe to down cast since we know the arguments cannot be negative and + // Cost is of type int64_t. + return static_cast(Value); +} + /// Attempt to specialize functions in the module to enable constant /// propagation across function boundaries. /// @@ -757,6 +769,11 @@ bool FunctionSpecializer::run() { SmallVector Clones; for (unsigned I = 0; I < NSpecs; ++I) { Spec &S = AllSpecs[BestSpecs[I]]; + + // Accumulate the codesize growth for the function, now we are creating the + // specialization. + FunctionGrowth[S.F] += S.CodeSize; + S.Clone = createSpecialization(S.F, S.Sig); // Update the known call sites to call the clone. @@ -835,18 +852,6 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) { return Clone; } -/// Get the unsigned Value of given Cost object. Assumes the Cost is always -/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and -/// always Valid. -static unsigned getCostValue(const Cost &C) { - int64_t Value = *C.getValue(); - - assert(Value >= 0 && "CodeSize and Latency cannot be negative"); - // It is safe to down cast since we know the arguments cannot be negative and - // Cost is of type int64_t. - return static_cast(Value); -} - bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, SmallVectorImpl &AllSpecs, SpecMap &SM) { @@ -922,16 +927,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, } CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs(); + unsigned CodeSizeSavings = getCostValue(CodeSize); + unsigned SpecSize = FuncSize - CodeSizeSavings; + auto IsProfitable = [&]() -> bool { // No check required. if (ForceSpecialization) return true; - unsigned CodeSizeSavings = getCostValue(CodeSize); - // TODO: We should only accumulate codesize increase of specializations - // that are actually created. - FunctionGrowth[F] += FuncSize - CodeSizeSavings; - LLVM_DEBUG( dbgs() << "FnSpecialization: Specialization bonus {Inlining = " << Score << " (" << (Score * 100 / FuncSize) << "%)}\n"); @@ -962,7 +965,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, if (LatencySavings < MinLatencySavings * FuncSize / 100) return false; // Maximum codesize growth. - if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth) + if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth) return false; Score += std::max(CodeSizeSavings, LatencySavings); @@ -974,7 +977,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize, continue; // Create a new specialisation entry. - auto &Spec = AllSpecs.emplace_back(F, S, Score); + auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize); if (CS.getFunction() != F) Spec.CallSites.push_back(&CS); const unsigned Index = AllSpecs.size() - 1; diff --git a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll new file mode 100644 index 000000000000000..82d1f7ae4a6e160 --- /dev/null +++ b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; RUN: opt -passes="ipsccp" -funcspec-min-function-size=1 \ +; RUN: -funcspec-for-literal-constant=true \ +; RUN: -funcspec-min-codesize-savings=50 \ +; RUN: -funcspec-min-latency-savings=50 \ +; RUN: -funcspec-max-codesize-growth=1 \ +; RUN: -S < %s | FileCheck %s + +; Verify that we are able to specialize a function successfully after analysis +; of other specializations that are found to not be profitable. +define void @test_specialize_after_failed_analysis(i32 %n) { +entry: + %notspec0 = call i32 @add(i32 0, i32 %n) + %notspec1 = call i32 @add(i32 1, i32 %n) + %spec = call i32 @add(i32 1, i32 1) + ret void +} + +define i32 @add(i32 %x, i32 %y) { +entry: + %res = add i32 %x, %y + ret i32 %res +} +; CHECK-LABEL: define void @test_specialize_after_failed_analysis( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[NOTSPEC0:%.*]] = call i32 @add(i32 0, i32 [[N]]) +; CHECK-NEXT: [[NOTSPEC1:%.*]] = call i32 @add(i32 1, i32 [[N]]) +; CHECK-NEXT: [[SPEC:%.*]] = call i32 @add.specialized.1(i32 1, i32 1) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define i32 @add( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = add i32 [[X]], [[Y]] +; CHECK-NEXT: ret i32 [[RES]] +; +; +; CHECK-LABEL: define internal i32 @add.specialized.1( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret i32 poison +; From c0cba25cdd06d700bdc15e9ae48c1fcadd0963bd Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Tue, 29 Oct 2024 21:13:54 +0900 Subject: [PATCH 276/425] [mlir][Transforms] Dialect conversion: Hardening `replaceOp` (#109540) This commit adds extra checks/assertions to the `ConversionPatternRewriterImpl::notifyOpReplaced` to improve its robustness. 1. Replacing an `unrealized_conversion_cast` op that was created by the driver is now forbidden and caught early during `replaceOp`. It may work in some cases, but it is generally dangerous because the conversion driver keeps track of these ops and performs some extra legalization steps during the "finalize" phase. (Erasing is them is fine.) 2. `null` replacement values are no longer registered in the `ConversionValueMapping`. This was an oversight in #106760. There is no benefit in having `null` values in the `ConversionValueMapping`. (It may even cause problems.) This change is in preparation of merging the 1:1 and 1:N dialect conversion drivers. --- .../Transforms/Utils/DialectConversion.cpp | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 9f8a482d6e2d223..44cf8331d55a733 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -1382,16 +1382,21 @@ void ConversionPatternRewriterImpl::notifyOpReplaced(Operation *op, assert(newValues.size() == op->getNumResults()); assert(!ignoredOps.contains(op) && "operation was already replaced"); + // Check if replaced op is an unresolved materialization, i.e., an + // unrealized_conversion_cast op that was created by the conversion driver. + bool isUnresolvedMaterialization = false; + if (auto castOp = dyn_cast(op)) + if (unresolvedMaterializations.contains(castOp)) + isUnresolvedMaterialization = true; + // Create mappings for each of the new result values. for (auto [newValue, result] : llvm::zip(newValues, op->getResults())) { if (!newValue) { // This result was dropped and no replacement value was provided. - if (auto castOp = dyn_cast(op)) { - if (unresolvedMaterializations.contains(castOp)) { - // Do not create another materializations if we are erasing a - // materialization. - continue; - } + if (isUnresolvedMaterialization) { + // Do not create another materializations if we are erasing a + // materialization. + continue; } // Materialize a replacement value "out of thin air". @@ -1400,10 +1405,20 @@ void ConversionPatternRewriterImpl::notifyOpReplaced(Operation *op, result.getLoc(), /*inputs=*/ValueRange(), /*outputType=*/result.getType(), /*originalType=*/Type(), currentTypeConverter); + } else { + // Make sure that the user does not mess with unresolved materializations + // that were inserted by the conversion driver. We keep track of these + // ops in internal data structures. Erasing them must be allowed because + // this can happen when the user is erasing an entire block (including + // its body). But replacing them with another value should be forbidden + // to avoid problems with the `mapping`. + assert(!isUnresolvedMaterialization && + "attempting to replace an unresolved materialization"); } - // Remap, and check for any result type changes. - mapping.map(result, newValue); + // Remap result to replacement value. + if (newValue) + mapping.map(result, newValue); } appendRewrite(op, currentTypeConverter); From 8239ea3918828ab9c5ea8be1f4100d464f0bf3c0 Mon Sep 17 00:00:00 2001 From: Abid Qadeer Date: Tue, 29 Oct 2024 12:22:43 +0000 Subject: [PATCH 277/425] [flang][debug] Support IndexType. (#113921) --- flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp | 4 ++++ flang/test/Transforms/debug-index-type.fir | 10 ++++++++++ 2 files changed, 14 insertions(+) create mode 100644 flang/test/Transforms/debug-index-type.fir diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 1ab6c76dae8eda7..8e516734a908790 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -581,6 +581,10 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, /*genAssociated=*/false); } else if (auto vecTy = mlir::dyn_cast_or_null(Ty)) { return convertVectorType(vecTy, fileAttr, scope, declOp); + } else if (mlir::isa(Ty)) { + return genBasicType(context, mlir::StringAttr::get(context, "integer"), + llvmTypeConverter.getIndexTypeBitwidth(), + llvm::dwarf::DW_ATE_signed); } else if (auto boxTy = mlir::dyn_cast_or_null(Ty)) { auto elTy = boxTy.getElementType(); if (auto seqTy = mlir::dyn_cast_or_null(elTy)) diff --git a/flang/test/Transforms/debug-index-type.fir b/flang/test/Transforms/debug-index-type.fir new file mode 100644 index 000000000000000..20bd8471d7cf648 --- /dev/null +++ b/flang/test/Transforms/debug-index-type.fir @@ -0,0 +1,10 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func private @str(%arg0: index) -> i32 loc(#loc1) +} +#loc1 = loc("test.f90":5:1) + +// CHECK: #[[INT32_TY:.*]] = #llvm.di_basic_type +// CHECK: #[[INT64_TY:.*]] = #llvm.di_basic_type +// CHECK: #llvm.di_subroutine_type<{{.*}}types = #[[INT32_TY]], #[[INT64_TY]]> From d48c849ea94efb56d484393816e147afcec28d65 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 29 Oct 2024 08:00:44 -0500 Subject: [PATCH 278/425] [flang][OpenMP] Parsing support for iterator in DEPEND clause (#113622) Warn about use of iterators OpenMP versions that didn't have them (support added in 5.0). Emit a TODO error in lowering. --- flang/include/flang/Parser/parse-tree.h | 4 +- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 64 +++++++++++-------- flang/lib/Lower/OpenMP/Clauses.cpp | 15 +++-- flang/lib/Parser/openmp-parsers.cpp | 3 +- flang/lib/Semantics/check-omp-structure.cpp | 9 +++ .../test/Lower/OpenMP/Todo/depend-clause.f90 | 10 +++ flang/test/Semantics/OpenMP/depend05.f90 | 9 +++ 7 files changed, 79 insertions(+), 35 deletions(-) create mode 100644 flang/test/Lower/OpenMP/Todo/depend-clause.f90 create mode 100644 flang/test/Semantics/OpenMP/depend05.f90 diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 506a470c5557b74..174f4c631e9d4cd 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3566,7 +3566,9 @@ struct OmpDependClause { WRAPPER_CLASS(Sink, std::list); struct InOut { TUPLE_CLASS_BOILERPLATE(InOut); - std::tuple t; + std::tuple, OmpTaskDependenceType, + OmpObjectList> + t; }; std::variant u; }; diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 8fb0dd4a1ec3a70..7c254ce673855ac 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -795,35 +795,43 @@ bool ClauseProcessor::processCopyprivate( bool ClauseProcessor::processDepend(mlir::omp::DependClauseOps &result) const { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - return findRepeatableClause( - [&](const omp::clause::Depend &clause, const parser::CharBlock &) { - using Depend = omp::clause::Depend; - assert(std::holds_alternative(clause.u) && - "Only the form with dependence type is handled at the moment"); - auto &depType = std::get(clause.u); - auto kind = std::get(depType.t); - auto &objects = std::get(depType.t); - - mlir::omp::ClauseTaskDependAttr dependTypeOperand = - genDependKindAttr(firOpBuilder, kind); - result.dependKinds.append(objects.size(), dependTypeOperand); - - for (const omp::Object &object : objects) { - assert(object.ref() && "Expecting designator"); - - if (evaluate::ExtractSubstring(*object.ref())) { - TODO(converter.getCurrentLocation(), - "substring not supported for task depend"); - } else if (evaluate::IsArrayElement(*object.ref())) { - TODO(converter.getCurrentLocation(), - "array sections not supported for task depend"); - } + auto process = [&](const omp::clause::Depend &clause, + const parser::CharBlock &) { + using Depend = omp::clause::Depend; + if (!std::holds_alternative(clause.u)) { + TODO(converter.getCurrentLocation(), + "DEPEND clause with SINK or SOURCE is not supported yet"); + } + auto &depType = std::get(clause.u); + auto kind = std::get(depType.t); + auto &objects = std::get(depType.t); - semantics::Symbol *sym = object.sym(); - const mlir::Value variable = converter.getSymbolAddress(*sym); - result.dependVars.push_back(variable); - } - }); + if (std::get>(depType.t)) { + TODO(converter.getCurrentLocation(), + "Support for iterator modifiers is not implemented yet"); + } + mlir::omp::ClauseTaskDependAttr dependTypeOperand = + genDependKindAttr(firOpBuilder, kind); + result.dependKinds.append(objects.size(), dependTypeOperand); + + for (const omp::Object &object : objects) { + assert(object.ref() && "Expecting designator"); + + if (evaluate::ExtractSubstring(*object.ref())) { + TODO(converter.getCurrentLocation(), + "substring not supported for task depend"); + } else if (evaluate::IsArrayElement(*object.ref())) { + TODO(converter.getCurrentLocation(), + "array sections not supported for task depend"); + } + + semantics::Symbol *sym = object.sym(); + const mlir::Value variable = converter.getSymbolAddress(*sym); + result.dependVars.push_back(variable); + } + }; + + return findRepeatableClause(process); } bool ClauseProcessor::processHasDeviceAddr( diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index b1fa52751fbd7b0..9483f643acd55a6 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -595,11 +595,16 @@ Depend make(const parser::OmpClause::Depend &inp, }, // Depend::DepType [&](const wrapped::InOut &s) -> Variant { - auto &t0 = std::get(s.t); - auto &t1 = std::get(s.t); - return Depend::DepType{{/*TaskDependenceType=*/convert1(t0.v), - /*Iterator=*/std::nullopt, - /*LocatorList=*/makeObjects(t1, semaCtx)}}; + auto &t0 = + std::get>(s.t); + auto &t1 = std::get(s.t); + auto &t2 = std::get(s.t); + + auto &&maybeIter = maybeApply( + [&](auto &&s) { return makeIterator(s, semaCtx); }, t0); + return Depend::DepType{{/*TaskDependenceType=*/convert1(t1.v), + /*Iterator=*/std::move(maybeIter), + /*LocatorList=*/makeObjects(t2, semaCtx)}}; }, }, inp.v.u)}; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 3ca4e93a6c9b933..598439cbee87e64 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -376,7 +376,8 @@ TYPE_CONTEXT_PARSER("Omp Depend clause"_en_US, construct( construct("SOURCE"_tok)) || construct(construct( - Parser{}, ":" >> Parser{}))) + maybe(Parser{} / ","_tok), + Parser{} / ":", Parser{}))) // 2.15.3.7 LINEAR (linear-list: linear-step) // linear-list -> list | modifier(list) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 0c3a0e76df6a6af..8f3eb9fefee6784 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3312,6 +3312,15 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Depend &x) { } } } + if (std::get>(inOut->t)) { + unsigned version{context_.langOptions().OpenMPVersion}; + unsigned allowedInVersion{50}; + if (version < allowedInVersion) { + context_.Say(GetContext().clauseSource, + "Iterator modifiers are not supported in %s, %s"_warn_en_US, + ThisVersion(version), TryVersion(allowedInVersion)); + } + } } } diff --git a/flang/test/Lower/OpenMP/Todo/depend-clause.f90 b/flang/test/Lower/OpenMP/Todo/depend-clause.f90 new file mode 100644 index 000000000000000..74525888c91d6da --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/depend-clause.f90 @@ -0,0 +1,10 @@ +!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s + +!CHECK: Support for iterator modifiers is not implemented yet +subroutine f00(x) + integer :: x(10) + !$omp task depend(iterator(i = 1:10), in: x(i)) + x = 0 + !$omp end task +end diff --git a/flang/test/Semantics/OpenMP/depend05.f90 b/flang/test/Semantics/OpenMP/depend05.f90 new file mode 100644 index 000000000000000..53fd82bd08a9eb2 --- /dev/null +++ b/flang/test/Semantics/OpenMP/depend05.f90 @@ -0,0 +1,9 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=45 -Werror + +subroutine f00(x) + integer :: x(10) +!WARNING: Iterator modifiers are not supported in OpenMP v4.5, try -fopenmp-version=50 + !$omp task depend(iterator(i = 1:10), in: x(i)) + x = 0 + !$omp end task +end From 3c2d77185e315d4558368ccab92e7a86c74a9a83 Mon Sep 17 00:00:00 2001 From: Lukacma Date: Tue, 29 Oct 2024 13:02:46 +0000 Subject: [PATCH 279/425] [AARCH64] Add assembly/disassembly for FMMLA instructions (#113313) This patch adds assembly/disassembly for the following instructions: FMMLA (widening, FP16 to FP32) FMMLA (widening, FP8 to FP16) FMMLA (widening, FP8 to FP32) According to [1] [1]https://developer.arm.com/documentation/ddi0602 --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 18 +++++- llvm/lib/Target/AArch64/SVEInstrFormats.td | 61 +++++++++---------- .../SVE/matrix-multiply-fp-diagnostics.s | 5 -- .../MC/AArch64/SVE2/directive-arch-negative.s | 20 +++++- llvm/test/MC/AArch64/SVE2/directive-arch.s | 14 ++++- .../SVE2/directive-arch_extension-negative.s | 20 +++++- .../AArch64/SVE2/directive-arch_extension.s | 14 ++++- .../MC/AArch64/SVE2/directive-cpu-negative.s | 20 +++++- llvm/test/MC/AArch64/SVE2/directive-cpu.s | 14 ++++- .../AArch64/SVE2/fmmla-f16f32mm-diagnostics.s | 18 ++++++ llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s | 41 +++++++++++++ .../AArch64/SVE2/fmmla-f8f16mm-diagnostics.s | 24 ++++++++ llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s | 39 ++++++++++++ .../AArch64/SVE2/fmmla-f8f32mm-diagnostics.s | 30 +++++++++ llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s | 39 ++++++++++++ 15 files changed, 331 insertions(+), 46 deletions(-) create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 2b69903b133fe3b..4f101d0d46b7afa 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2427,7 +2427,7 @@ let Predicates = [HasBF16, HasSVEorSME] in { } // End HasBF16, HasSVEorSME let Predicates = [HasBF16, HasSVE] in { - defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>; + defm BFMMLA_ZZZ : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>; } // End HasBF16, HasSVE let Predicates = [HasBF16, HasSVEorSME] in { @@ -3449,11 +3449,15 @@ let Predicates = [HasSVEorSME, HasMatMulInt8] in { } // End HasSVEorSME, HasMatMulInt8 let Predicates = [HasSVE, HasMatMulFP32] in { - defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>; + defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b10, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>; } // End HasSVE, HasMatMulFP32 +let Predicates = [HasSVE_F16F32MM] in { + def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b00, "fmmla", ZPR32, ZPR16>; +} // End HasSVE_F16F32MM + let Predicates = [HasSVE, HasMatMulFP64] in { - defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>; + defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b11, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>; defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8, nxv16i8, nxv16i1, AArch64ld1ro_z>; defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1, AArch64ld1ro_z>; defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1, AArch64ld1ro_z>; @@ -4342,6 +4346,14 @@ def FMLALLTB_ZZZ : sve2_fp8_mla<0b010, ZPR32, "fmlalltb">; def FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt">; } // End HasSSVE_FP8FMA +let Predicates = [HasSVE2, HasF8F32MM] in { + def FMMLA_ZZZ_BtoS : sve2_fp8_mmla<0b0, ZPR32, "fmmla">; +} + +let Predicates = [HasSVE2, HasF8F16MM] in { + def FMMLA_ZZZ_BtoH : sve2_fp8_mmla<0b1, ZPR16, "fmmla">; +} + let Predicates = [HasSSVE_FP8DOT2] in { // FP8 Widening Dot-Product - Indexed Group defm FDOT_ZZZI_BtoH : sve2_fp8_dot_indexed_h<"fdot">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 31312e00b919e23..d1ceb30f36dcdc9 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -9042,30 +9042,6 @@ multiclass sve_float_dot_indexed opc, ZPRRegOp src1_ty, def : SVE_4_Op_Imm_Pat(NAME)>; } -class sve_bfloat_matmul -: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm), - asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { - bits<5> Zm; - bits<5> Zda; - bits<5> Zn; - let Inst{31-21} = 0b01100100011; - let Inst{20-16} = Zm; - let Inst{15-10} = 0b111001; - let Inst{9-5} = Zn; - let Inst{4-0} = Zda; - - let Constraints = "$Zda = $_Zda"; - let DestructiveInstType = DestructiveOther; - let ElementSize = ElementSizeH; - let hasSideEffects = 0; - let mayRaiseFPException = 1; -} - -multiclass sve_bfloat_matmul { - def NAME : sve_bfloat_matmul; - def : SVE_3_Op_Pat(NAME)>; -} - class sve_bfloat_convert : I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn), asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> { @@ -9188,14 +9164,14 @@ multiclass sve_int_dot_mixed_indexed { // SVE Floating Point Matrix Multiply Accumulate Group //===----------------------------------------------------------------------===// -class sve_fp_matrix_mla -: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty:$Zm), +class sve_fp_matrix_mla opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty> +: I<(outs zda_ty:$Zda), (ins zda_ty:$_Zda, reg_ty:$Zn, reg_ty:$Zm), asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zda; bits<5> Zn; bits<5> Zm; - let Inst{31-23} = 0b011001001; - let Inst{22} = sz; + let Inst{31-24} = 0b01100100; + let Inst{23-22} = opc; let Inst{21} = 1; let Inst{20-16} = Zm; let Inst{15-10} = 0b111001; @@ -9204,15 +9180,14 @@ class sve_fp_matrix_mla let Constraints = "$Zda = $_Zda"; let DestructiveInstType = DestructiveOther; - let ElementSize = zprty.ElementSize; let hasSideEffects = 0; let mayRaiseFPException = 1; } -multiclass sve_fp_matrix_mla { - def NAME : sve_fp_matrix_mla; +multiclass sve_fp_matrix_mla opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty, SDPatternOperator op, ValueType zda_vt, ValueType reg_vt> { + def NAME : sve_fp_matrix_mla; - def : SVE_3_Op_Pat(NAME)>; + def : SVE_3_Op_Pat(NAME)>; } //===----------------------------------------------------------------------===// @@ -10632,6 +10607,28 @@ class sve2_fp8_mla_long_long_by_indexed_elem TT, string mnemonic> let Uses = [FPMR, FPCR]; } +// FP8 Matrix Multiply-accumulate Group +class sve2_fp8_mmla + : I<(outs dst_ty:$Zda), + (ins dst_ty:$_Zda, ZPR8:$Zn, ZPR8:$Zm), + mnemonic, "\t$Zda, $Zn, $Zm", + "", []>, Sched<[]>{ + bits<5> Zda; + bits<5> Zn; + bits<5> Zm; + let Inst{31-23} = 0b011001000; + let Inst{22} = opc; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-10} = 0b111000; + let Inst{9-5} = Zn; + let Inst{4-0} = Zda; + let Constraints = "$Zda = $_Zda"; + let DestructiveInstType = DestructiveOther; + let ElementSize = dst_ty.ElementSize; + let Uses = [FPMR, FPCR]; +} + class sve_fp8_dot_indexed opc, ZPRRegOp dst_ty, Operand iop_ty, string mnemonic> : I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, iop_ty:$iop), mnemonic, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> { diff --git a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s index 8ae4d4992844239..2fe43f7aa8444cc 100644 --- a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s @@ -3,11 +3,6 @@ // --------------------------------------------------------------------------// // FMMLA (SVE) -// Invalid element size - -fmmla z0.h, z1.h, z2.h -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width - // Mis-matched element size fmmla z0.d, z1.s, z2.s diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s b/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s index 858aaf9d13ecc4e..966bead071fe395 100644 --- a/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s +++ b/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s @@ -30,8 +30,26 @@ bgrp z21.s, z10.s, z21.s // CHECK: error: instruction requires: sve2-bitperm // CHECK-NEXT: bgrp z21.s, z10.s, z21.s +.arch armv9-a+f8f16mm +.arch armv9-a+nof8f16mm +fmmla z23.h, z13.b, z8.b +// CHECK: error: instruction requires: f8f16mm +// CHECK-NEXT: fmmla z23.h, z13.b, z8.b + +.arch armv9-a+f8f32mm +.arch armv9-a+nof8f32mm +fmmla z23.s, z13.b, z8.b +// CHECK: error: instruction requires: f8f32mm +// CHECK-NEXT: fmmla z23.s, z13.b, z8.b + +.arch armv9-a+sve-f16f32mm +.arch armv9-a+nosve-f16f32mm +fmmla z23.s, z13.h, z8.h +// CHECK: error: instruction requires: sve-f16f32mm +// CHECK-NEXT: fmmla z23.s, z13.h, z8.h + .arch armv9-a+sve-bfscale .arch armv9-a+nosve-bfscale bfscale z0.h, p0/m, z0.h, z0.h // CHECK: error: instruction requires: sve-bfscale -// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h \ No newline at end of file +// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch.s b/llvm/test/MC/AArch64/SVE2/directive-arch.s index b9710b67f8a1d0a..99f6198a60abbcd 100644 --- a/llvm/test/MC/AArch64/SVE2/directive-arch.s +++ b/llvm/test/MC/AArch64/SVE2/directive-arch.s @@ -20,6 +20,18 @@ rax1 z0.d, z0.d, z0.d bgrp z21.s, z10.s, z21.s // CHECK: bgrp z21.s, z10.s, z21.s +.arch armv9-a+f8f16mm +fmmla z23.h, z13.b, z8.b +// CHECK: fmmla z23.h, z13.b, z8.b + +.arch armv9-a+f8f32mm +fmmla z23.s, z13.b, z8.b +// CHECK: fmmla z23.s, z13.b, z8.b + +.arch armv9-a+sve-f16f32mm +fmmla z23.s, z13.h, z8.h +// CHECK: fmmla z23.s, z13.h, z8.h + .arch armv9-a+sve-bfscale bfscale z0.h, p0/m, z0.h, z0.h -// CHECK: bfscale z0.h, p0/m, z0.h, z0.h \ No newline at end of file +// CHECK: bfscale z0.h, p0/m, z0.h, z0.h diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s index bd625d252626921..e967f5aa60bd738 100644 --- a/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s +++ b/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s @@ -30,8 +30,26 @@ bgrp z21.s, z10.s, z21.s // CHECK: error: instruction requires: sve2-bitperm // CHECK-NEXT: bgrp z21.s, z10.s, z21.s +.arch_extension f8f16mm +.arch_extension nof8f16mm +fmmla z23.h, z13.b, z8.b +// CHECK: error: instruction requires: f8f16mm +// CHECK-NEXT: fmmla z23.h, z13.b, z8.b + +.arch_extension f8f32mm +.arch_extension nof8f32mm +fmmla z23.s, z13.b, z8.b +// CHECK: error: instruction requires: f8f32mm +// CHECK-NEXT: fmmla z23.s, z13.b, z8.b + +.arch_extension sve-f16f32mm +.arch_extension nosve-f16f32mm +fmmla z23.s, z13.h, z8.h +// CHECK: error: instruction requires: sve-f16f32mm +// CHECK-NEXT: fmmla z23.s, z13.h, z8.h + .arch_extension sve-bfscale .arch_extension nosve-bfscale bfscale z0.h, p0/m, z0.h, z0.h // CHECK: error: instruction requires: sve-bfscale -// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h \ No newline at end of file +// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s b/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s index 28d803bf7cc88c2..2fdbb525464d90f 100644 --- a/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s +++ b/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s @@ -20,6 +20,18 @@ rax1 z0.d, z0.d, z0.d bgrp z21.s, z10.s, z21.s // CHECK: bgrp z21.s, z10.s, z21.s +.arch_extension f8f16mm +fmmla z23.h, z13.b, z8.b +// CHECK: fmmla z23.h, z13.b, z8.b + +.arch_extension f8f32mm +fmmla z23.s, z13.b, z8.b +// CHECK: fmmla z23.s, z13.b, z8.b + +.arch_extension sve-f16f32mm +fmmla z23.s, z13.h, z8.h +// CHECK: fmmla z23.s, z13.h, z8.h + .arch_extension sve-bfscale bfscale z0.h, p0/m, z0.h, z0.h -// CHECK: bfscale z0.h, p0/m, z0.h, z0.h \ No newline at end of file +// CHECK: bfscale z0.h, p0/m, z0.h, z0.h diff --git a/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s b/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s index 45a04a58eac3b49..9a8af638b703780 100644 --- a/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s +++ b/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s @@ -30,8 +30,26 @@ bgrp z21.s, z10.s, z21.s // CHECK: error: instruction requires: sve2-bitperm // CHECK-NEXT: bgrp z21.s, z10.s, z21.s +.cpu generic+sve2+f8f16mm +.cpu generic+sve2+nof8f16mm +fmmla z23.h, z13.b, z8.b +// CHECK: error: instruction requires: f8f16mm +// CHECK-NEXT: fmmla z23.h, z13.b, z8.b + +.cpu generic+sve2+f8f32mm +.cpu generic+sve2+nof8f32mm +fmmla z23.s, z13.b, z8.b +// CHECK: error: instruction requires: f8f32mm +// CHECK-NEXT: fmmla z23.s, z13.b, z8.b + +.cpu generic+sve-f16f32mm +.cpu generic+nosve-f16f32mm +fmmla z23.s, z13.h, z8.h +// CHECK: error: instruction requires: sve-f16f32mm +// CHECK-NEXT: fmmla z23.s, z13.h, z8.h + .cpu generic+sve-bfscale .cpu generic+nosve-bfscale bfscale z0.h, p0/m, z0.h, z0.h // CHECK: error: instruction requires: sve-bfscale -// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h \ No newline at end of file +// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h diff --git a/llvm/test/MC/AArch64/SVE2/directive-cpu.s b/llvm/test/MC/AArch64/SVE2/directive-cpu.s index 75d2321bf620779..daa5ec510b226ae 100644 --- a/llvm/test/MC/AArch64/SVE2/directive-cpu.s +++ b/llvm/test/MC/AArch64/SVE2/directive-cpu.s @@ -20,6 +20,18 @@ rax1 z0.d, z0.d, z0.d bgrp z21.s, z10.s, z21.s // CHECK: bgrp z21.s, z10.s, z21.s +.cpu generic+sve2+f8f16mm +fmmla z23.h, z13.b, z8.b +// CHECK: fmmla z23.h, z13.b, z8.b + +.cpu generic+sve2+f8f32mm +fmmla z23.s, z13.b, z8.b +// CHECK: fmmla z23.s, z13.b, z8.b + +.cpu generic+sve-f16f32mm +fmmla z23.s, z13.h, z8.h +// CHECK: fmmla z23.s, z13.h, z8.h + .cpu generic+sve-bfscale bfscale z0.h, p0/m, z0.h, z0.h -// CHECK: bfscale z0.h, p0/m, z0.h, z0.h \ No newline at end of file +// CHECK: bfscale z0.h, p0/m, z0.h, z0.h diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s new file mode 100644 index 000000000000000..924c123f0ca5bc9 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s @@ -0,0 +1,18 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-f16f32mm 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// FMMLA (SVE) + +// Invalid element size + +fmmla z0.s, z1.b, z2.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f8f32mm +fmmla z0.d, z1.h, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width + +// Mis-matched element size + +fmmla z0.s, z1.h, z2.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +fmmla z0.s, z1.d, z2.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s new file mode 100644 index 000000000000000..84efcfe8b12e48b --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s @@ -0,0 +1,41 @@ + +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+sve-f16f32mm < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+sve-f16f32mm < %s \ +// RUN: | llvm-objdump -d --mattr=+sve,+sve-f16f32mm - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+sve-f16f32mm < %s \ +// RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+sve-f16f32mm < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve,+sve-f16f32mm -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movprfx z23, z31 +fmmla z23.s, z13.h, z8.h // 01100100-00101000-11100101-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: fmmla z23.s, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0xe5,0x28,0x64] +// CHECK-ERROR: instruction requires: sve-f16f32mm +// CHECK-UNKNOWN: 6428e5b7 + +fmmla z0.s, z0.h, z0.h // 01100100-00100000-11100100-00000000 +// CHECK-INST: fmmla z0.s, z0.h, z0.h +// CHECK-ENCODING: [0x00,0xe4,0x20,0x64] +// CHECK-ERROR: instruction requires: sve-f16f32mm +// CHECK-UNKNOWN: 6420e400 + +fmmla z23.s, z13.h, z8.h // 01100100-00101000-11100101-10110111 +// CHECK-INST: fmmla z23.s, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0xe5,0x28,0x64] +// CHECK-ERROR: instruction requires: sve-f16f32mm +// CHECK-UNKNOWN: 6428e5b7 + +fmmla z31.s, z31.h, z31.h // 01100100-00111111-11100111-11111111 +// CHECK-INST: fmmla z31.s, z31.h, z31.h +// CHECK-ENCODING: [0xff,0xe7,0x3f,0x64] +// CHECK-ERROR: instruction requires: sve-f16f32mm +// CHECK-UNKNOWN: 643fe7ff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s new file mode 100644 index 000000000000000..59818d2d24a481c --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s @@ -0,0 +1,24 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f16mm 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid element width + +fmmla z21.b, z10.b, z21.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fmmla z21.b, z10.b, z21.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla z21.d, z10.b, z21.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fmmla z21.d, z10.b, z21.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla z21.s, z10.h, z21.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sve-f16f32mm +// CHECK-NEXT: fmmla z21.s, z10.h, z21.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla z21.s, z10.s, z21.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f32mm +// CHECK-NEXT: fmmla z21.s, z10.s, z21.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s new file mode 100644 index 000000000000000..ff343548993cfe5 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f16mm < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f16mm < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2,+f8f16mm - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f16mm < %s \ +// RUN: | llvm-objdump -d --mattr=-sve2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f16mm < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2,+f8f16mm -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +movprfx z23, z31 +fmmla z23.h, z13.b, z8.b // 01100100-01101000-11100001-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: fmmla z23.h, z13.b, z8.b +// CHECK-ENCODING: [0xb7,0xe1,0x68,0x64] +// CHECK-ERROR: instruction requires: f8f16mm sve2 +// CHECK-UNKNOWN: 6468e1b7 + +fmmla z0.h, z0.b, z0.b // 01100100-01100000-11100000-00000000 +// CHECK-INST: fmmla z0.h, z0.b, z0.b +// CHECK-ENCODING: [0x00,0xe0,0x60,0x64] +// CHECK-ERROR: instruction requires: f8f16mm sve2 +// CHECK-UNKNOWN: 6460e000 + +fmmla z21.h, z10.b, z21.b // 01100100-01110101-11100001-01010101 +// CHECK-INST: fmmla z21.h, z10.b, z21.b +// CHECK-ENCODING: [0x55,0xe1,0x75,0x64] +// CHECK-ERROR: instruction requires: f8f16mm sve2 +// CHECK-UNKNOWN: 6475e155 + +fmmla z31.h, z31.b, z31.b // 01100100-01111111-11100011-11111111 +// CHECK-INST: fmmla z31.h, z31.b, z31.b +// CHECK-ENCODING: [0xff,0xe3,0x7f,0x64] +// CHECK-ERROR: instruction requires: f8f16mm sve2 +// CHECK-UNKNOWN: 647fe3ff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s new file mode 100644 index 000000000000000..0b1eb1b24e26438 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f32mm 2>&1 < %s| FileCheck %s + + +// ------------------------------------------------------------------------- // +// Invalid element width + +fmmla z21.b, z10.b, z21.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fmmla z21.b, z10.b, z21.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla z21.h, z10.b, z21.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f8f16mm +// CHECK-NEXT: fmmla z21.h, z10.b, z21.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla z21.d, z10.b, z21.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fmmla z21.d, z10.b, z21.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla z21.s, z10.h, z21.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sve-f16f32mm +// CHECK-NEXT: fmmla z21.s, z10.h, z21.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmmla z21.s, z10.s, z21.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f32mm +// CHECK-NEXT: fmmla z21.s, z10.s, z21.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s new file mode 100644 index 000000000000000..8b59a112dc61b4c --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f32mm < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f32mm < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2,+f8f32mm - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f32mm < %s \ +// RUN: | llvm-objdump -d --mattr=-sve2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f32mm < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2,+f8f32mm -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +movprfx z23, z31 +fmmla z23.s, z13.b, z8.b // 01100100-00101000-11100001-10110111 +// CHECK-INST: movprfx z23, z31 +// CHECK-INST: fmmla z23.s, z13.b, z8.b +// CHECK-ENCODING: [0xb7,0xe1,0x28,0x64] +// CHECK-ERROR: instruction requires: f8f32mm sve2 +// CHECK-UNKNOWN: 6428e1b7 + +fmmla z0.s, z0.b, z0.b // 01100100-00100000-11100000-00000000 +// CHECK-INST: fmmla z0.s, z0.b, z0.b +// CHECK-ENCODING: [0x00,0xe0,0x20,0x64] +// CHECK-ERROR: instruction requires: f8f32mm sve2 +// CHECK-UNKNOWN: 6420e000 + +fmmla z21.s, z10.b, z21.b // 01100100-00110101-11100001-01010101 +// CHECK-INST: fmmla z21.s, z10.b, z21.b +// CHECK-ENCODING: [0x55,0xe1,0x35,0x64] +// CHECK-ERROR: instruction requires: f8f32mm sve2 +// CHECK-UNKNOWN: 6435e155 + +fmmla z31.s, z31.b, z31.b // 01100100-00111111-11100011-11111111 +// CHECK-INST: fmmla z31.s, z31.b, z31.b +// CHECK-ENCODING: [0xff,0xe3,0x3f,0x64] +// CHECK-ERROR: instruction requires: f8f32mm sve2 +// CHECK-UNKNOWN: 643fe3ff From f7adacf57901ca65977f2af3502f434747cdd183 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 29 Oct 2024 09:03:35 -0400 Subject: [PATCH 280/425] Nominate Vlad Serebrennikov for C++ DRs (#114040) Vlad has been improving our C++ DR conformance testing story for many months at this point and writing these kinds of test is sometimes non- trivial, so having a maintainer specific for this is helpful. --- clang/Maintainers.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index ee5334b02f7000a..6635727e9eb65c6 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -226,6 +226,12 @@ C++ conformance | hubert.reinterpretcast\@gmail.com (email), hubert.reinterpretcast (Phabricator), hubert-reinterpretcast (GitHub) +C++ Defect Reports +~~~~~~~~~~~~~~~~~~ +| Vlad Serebrennikov +| serebrennikov.vladislav\@gmail.com (email), Endilll (GitHub), Endill (Discord), Endill (Discourse) + + Objective-C/C++ conformance ~~~~~~~~~~~~~~~~~~~~~~~~~~~ | John McCall From 88e23eb2cfadbf92b109b0aec999378f0c2a1062 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 29 Oct 2024 06:08:50 -0700 Subject: [PATCH 281/425] DAG: Fix legalization of vector addrspacecasts (#113964) --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 + llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 24 +- llvm/test/CodeGen/AMDGPU/addrspacecast.ll | 1177 +++++++++++++++++ 4 files changed, 1200 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 47a9ae12248ccba..6ba12cfb8c51481 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -4375,6 +4375,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::FP_TO_SINT, dl, ResVT, RoundNode)); break; } + case ISD::ADDRSPACECAST: + Results.push_back(DAG.UnrollVectorOp(Node)); + break; case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8f255cce1fe15d1..5403d787861d46e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12590,6 +12590,14 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], getValueType(ExtVT))); + break; + } + case ISD::ADDRSPACECAST: { + const auto *ASC = cast(N); + Scalars.push_back(getAddrSpaceCast(dl, EltVT, Operands[0], + ASC->getSrcAddressSpace(), + ASC->getDestAddressSpace())); + break; } } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 0f65df0763cc834..e4b54c7d72b0835 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -512,18 +512,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, for (MVT VT : VectorIntTypes) { // Expand the following operations for the current type by default. - setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT, - ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU, - ISD::MULHS, ISD::OR, ISD::SHL, - ISD::SRA, ISD::SRL, ISD::ROTL, - ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP, - ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV, - ISD::SREM, ISD::UREM, ISD::SMUL_LOHI, - ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM, - ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC, - ISD::XOR, ISD::BSWAP, ISD::CTPOP, - ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE, - ISD::SETCC}, + setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT, + ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU, + ISD::MULHS, ISD::OR, ISD::SHL, + ISD::SRA, ISD::SRL, ISD::ROTL, + ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP, + ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV, + ISD::SREM, ISD::UREM, ISD::SMUL_LOHI, + ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM, + ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC, + ISD::XOR, ISD::BSWAP, ISD::CTPOP, + ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE, + ISD::SETCC, ISD::ADDRSPACECAST}, VT, Expand); } diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index 7336543b41cbc8c..236956c1829e77b 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -409,6 +409,1183 @@ define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspa ret void } +define <2 x ptr addrspace(5)> @addrspacecast_v2p0_to_v2p5(<2 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v2p0_to_v2p5: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(5)> + ret <2 x ptr addrspace(5)> %cast +} + +define <3 x ptr addrspace(5)> @addrspacecast_v3p0_to_v3p5(<3 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v3p0_to_v3p5: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(5)> + ret <3 x ptr addrspace(5)> %cast +} + +define <4 x ptr addrspace(5)> @addrspacecast_v4p0_to_v4p5(<4 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v4p0_to_v4p5: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] +; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(5)> + ret <4 x ptr addrspace(5)> %cast +} + +define <8 x ptr addrspace(5)> @addrspacecast_v8p0_to_v8p5(<8 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v8p0_to_v8p5: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] +; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] +; HSA-NEXT: v_cndmask_b32_e32 v4, -1, v8, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] +; HSA-NEXT: v_cndmask_b32_e32 v5, -1, v10, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] +; HSA-NEXT: v_cndmask_b32_e32 v6, -1, v12, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] +; HSA-NEXT: v_cndmask_b32_e32 v7, -1, v14, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(5)> + ret <8 x ptr addrspace(5)> %cast +} + +define <16 x ptr addrspace(5)> @addrspacecast_v16p0_to_v16p5(<16 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v16p0_to_v16p5: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[24:25] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[26:27] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; HSA-NEXT: v_cmp_ne_u64_e64 s[8:9], 0, v[28:29] +; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] +; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] +; HSA-NEXT: v_cndmask_b32_e32 v4, -1, v8, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] +; HSA-NEXT: v_cndmask_b32_e32 v5, -1, v10, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] +; HSA-NEXT: v_cndmask_b32_e64 v13, -1, v26, s[6:7] +; HSA-NEXT: v_cndmask_b32_e32 v6, -1, v12, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] +; HSA-NEXT: v_cndmask_b32_e64 v12, -1, v24, s[4:5] +; HSA-NEXT: v_cndmask_b32_e32 v7, -1, v14, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17] +; HSA-NEXT: v_cndmask_b32_e64 v14, -1, v28, s[8:9] +; HSA-NEXT: v_cndmask_b32_e32 v8, -1, v16, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19] +; HSA-NEXT: v_cndmask_b32_e32 v9, -1, v18, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21] +; HSA-NEXT: v_cndmask_b32_e32 v10, -1, v20, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[22:23] +; HSA-NEXT: v_cndmask_b32_e32 v11, -1, v22, vcc +; HSA-NEXT: s_waitcnt vmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[30:31] +; HSA-NEXT: v_cndmask_b32_e32 v15, -1, v30, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(5)> + ret <16 x ptr addrspace(5)> %cast +} + +define <2 x ptr> @addrspacecast_v2p5_to_v2p0(<2 x ptr addrspace(5)> %ptr) { +; CI-LABEL: addrspacecast_v2p5_to_v2p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x11 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v3, s4 +; CI-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; CI-NEXT: v_mov_b32_e32 v1, v4 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v2p5_to_v2p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <2 x ptr addrspace(5)> %ptr to <2 x ptr> + ret <2 x ptr> %cast +} + +define <3 x ptr> @addrspacecast_v3p5_to_v3p0(<3 x ptr addrspace(5)> %ptr) { +; CI-LABEL: addrspacecast_v3p5_to_v3p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x11 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v5, s4 +; CI-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; CI-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; CI-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc +; CI-NEXT: v_mov_b32_e32 v1, v7 +; CI-NEXT: v_mov_b32_e32 v2, v6 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v3p5_to_v3p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <3 x ptr addrspace(5)> %ptr to <3 x ptr> + ret <3 x ptr> %cast +} + +define <4 x ptr> @addrspacecast_v4p5_to_v4p0(<4 x ptr addrspace(5)> %ptr) { +; CI-LABEL: addrspacecast_v4p5_to_v4p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x11 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v7, s4 +; CI-NEXT: v_cndmask_b32_e32 v10, 0, v7, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; CI-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; CI-NEXT: v_cndmask_b32_e32 v5, 0, v7, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; CI-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; CI-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc +; CI-NEXT: v_mov_b32_e32 v1, v10 +; CI-NEXT: v_mov_b32_e32 v2, v8 +; CI-NEXT: v_mov_b32_e32 v3, v9 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v4p5_to_v4p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v7, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, v10 +; GFX9-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-NEXT: v_mov_b32_e32 v3, v9 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <4 x ptr addrspace(5)> %ptr to <4 x ptr> + ret <4 x ptr> %cast +} + +define <8 x ptr> @addrspacecast_v8p5_to_v8p0(<8 x ptr addrspace(5)> %ptr) { +; CI-LABEL: addrspacecast_v8p5_to_v8p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x11 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v15, s4 +; CI-NEXT: v_cndmask_b32_e32 v22, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v16, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v17, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; CI-NEXT: v_cndmask_b32_e32 v18, 0, v2, vcc +; CI-NEXT: v_cndmask_b32_e32 v19, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; CI-NEXT: v_cndmask_b32_e32 v20, 0, v3, vcc +; CI-NEXT: v_cndmask_b32_e32 v21, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 +; CI-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc +; CI-NEXT: v_cndmask_b32_e32 v9, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5 +; CI-NEXT: v_cndmask_b32_e32 v10, 0, v5, vcc +; CI-NEXT: v_cndmask_b32_e32 v11, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v6 +; CI-NEXT: v_cndmask_b32_e32 v12, 0, v6, vcc +; CI-NEXT: v_cndmask_b32_e32 v13, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v7 +; CI-NEXT: v_cndmask_b32_e32 v14, 0, v7, vcc +; CI-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc +; CI-NEXT: v_mov_b32_e32 v1, v22 +; CI-NEXT: v_mov_b32_e32 v2, v16 +; CI-NEXT: v_mov_b32_e32 v3, v17 +; CI-NEXT: v_mov_b32_e32 v4, v18 +; CI-NEXT: v_mov_b32_e32 v5, v19 +; CI-NEXT: v_mov_b32_e32 v6, v20 +; CI-NEXT: v_mov_b32_e32 v7, v21 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v8p5_to_v8p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v15, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v22, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v20, 0, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v21, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v14, 0, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, v22 +; GFX9-NEXT: v_mov_b32_e32 v2, v16 +; GFX9-NEXT: v_mov_b32_e32 v3, v17 +; GFX9-NEXT: v_mov_b32_e32 v4, v18 +; GFX9-NEXT: v_mov_b32_e32 v5, v19 +; GFX9-NEXT: v_mov_b32_e32 v6, v20 +; GFX9-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <8 x ptr addrspace(5)> %ptr to <8 x ptr> + ret <8 x ptr> %cast +} + +define <16 x ptr> @addrspacecast_v16p5_to_v16p0(<16 x ptr addrspace(5)> %ptr) { +; CI-LABEL: addrspacecast_v16p5_to_v16p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x11 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: v_cmp_ne_u32_e64 s[6:7], -1, v6 +; CI-NEXT: v_cmp_ne_u32_e64 s[8:9], -1, v7 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v31, s4 +; CI-NEXT: v_cndmask_b32_e32 v48, 0, v31, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v35, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v33, 0, v31, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; CI-NEXT: v_cndmask_b32_e32 v36, 0, v2, vcc +; CI-NEXT: v_cndmask_b32_e32 v49, 0, v31, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; CI-NEXT: v_cndmask_b32_e32 v37, 0, v3, vcc +; CI-NEXT: v_cndmask_b32_e32 v34, 0, v31, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 +; CI-NEXT: v_cmp_ne_u32_e64 s[4:5], -1, v5 +; CI-NEXT: v_cndmask_b32_e32 v38, 0, v4, vcc +; CI-NEXT: v_cndmask_b32_e64 v50, 0, v5, s[4:5] +; CI-NEXT: v_cndmask_b32_e64 v39, 0, v6, s[6:7] +; CI-NEXT: v_cndmask_b32_e64 v32, 0, v7, s[8:9] +; CI-NEXT: v_cmp_ne_u32_e64 s[10:11], -1, v8 +; CI-NEXT: v_cmp_ne_u32_e64 s[12:13], -1, v9 +; CI-NEXT: v_cmp_ne_u32_e64 s[14:15], -1, v10 +; CI-NEXT: v_cmp_ne_u32_e64 s[16:17], -1, v11 +; CI-NEXT: v_cmp_ne_u32_e64 s[18:19], -1, v12 +; CI-NEXT: v_cmp_ne_u32_e64 s[20:21], -1, v13 +; CI-NEXT: v_cmp_ne_u32_e64 s[22:23], -1, v14 +; CI-NEXT: v_cmp_ne_u32_e64 s[24:25], -1, v15 +; CI-NEXT: v_cndmask_b32_e64 v16, 0, v8, s[10:11] +; CI-NEXT: v_cndmask_b32_e64 v18, 0, v9, s[12:13] +; CI-NEXT: v_cndmask_b32_e64 v20, 0, v10, s[14:15] +; CI-NEXT: v_cndmask_b32_e64 v22, 0, v11, s[16:17] +; CI-NEXT: v_cndmask_b32_e64 v24, 0, v12, s[18:19] +; CI-NEXT: v_cndmask_b32_e64 v26, 0, v13, s[20:21] +; CI-NEXT: v_cndmask_b32_e64 v28, 0, v14, s[22:23] +; CI-NEXT: v_cndmask_b32_e64 v30, 0, v15, s[24:25] +; CI-NEXT: v_cndmask_b32_e32 v9, 0, v31, vcc +; CI-NEXT: v_cndmask_b32_e64 v11, 0, v31, s[4:5] +; CI-NEXT: v_cndmask_b32_e64 v13, 0, v31, s[6:7] +; CI-NEXT: v_cndmask_b32_e64 v15, 0, v31, s[8:9] +; CI-NEXT: v_cndmask_b32_e64 v17, 0, v31, s[10:11] +; CI-NEXT: v_cndmask_b32_e64 v19, 0, v31, s[12:13] +; CI-NEXT: v_cndmask_b32_e64 v21, 0, v31, s[14:15] +; CI-NEXT: v_cndmask_b32_e64 v23, 0, v31, s[16:17] +; CI-NEXT: v_cndmask_b32_e64 v25, 0, v31, s[18:19] +; CI-NEXT: v_cndmask_b32_e64 v27, 0, v31, s[20:21] +; CI-NEXT: v_cndmask_b32_e64 v29, 0, v31, s[22:23] +; CI-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[24:25] +; CI-NEXT: v_mov_b32_e32 v1, v48 +; CI-NEXT: v_mov_b32_e32 v2, v35 +; CI-NEXT: v_mov_b32_e32 v3, v33 +; CI-NEXT: v_mov_b32_e32 v4, v36 +; CI-NEXT: v_mov_b32_e32 v5, v49 +; CI-NEXT: v_mov_b32_e32 v6, v37 +; CI-NEXT: v_mov_b32_e32 v7, v34 +; CI-NEXT: v_mov_b32_e32 v8, v38 +; CI-NEXT: v_mov_b32_e32 v10, v50 +; CI-NEXT: v_mov_b32_e32 v12, v39 +; CI-NEXT: v_mov_b32_e32 v14, v32 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v16p5_to_v16p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v31, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v48, 0, v31, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v35, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v33, 0, v31, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v36, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v49, 0, v31, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v37, 0, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v34, 0, v31, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], -1, v5 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], -1, v6 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[8:9], -1, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v38, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v5, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v39, 0, v6, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e64 v32, 0, v7, s[8:9] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[10:11], -1, v8 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[12:13], -1, v9 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[14:15], -1, v10 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[16:17], -1, v11 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[18:19], -1, v12 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[20:21], -1, v13 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[22:23], -1, v14 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[24:25], -1, v15 +; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v8, s[10:11] +; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v9, s[12:13] +; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, v10, s[14:15] +; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, v11, s[16:17] +; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, v12, s[18:19] +; GFX9-NEXT: v_cndmask_b32_e64 v26, 0, v13, s[20:21] +; GFX9-NEXT: v_cndmask_b32_e64 v28, 0, v14, s[22:23] +; GFX9-NEXT: v_cndmask_b32_e64 v30, 0, v15, s[24:25] +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v31, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, v31, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, v31, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e64 v15, 0, v31, s[8:9] +; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, v31, s[10:11] +; GFX9-NEXT: v_cndmask_b32_e64 v19, 0, v31, s[12:13] +; GFX9-NEXT: v_cndmask_b32_e64 v21, 0, v31, s[14:15] +; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, v31, s[16:17] +; GFX9-NEXT: v_cndmask_b32_e64 v25, 0, v31, s[18:19] +; GFX9-NEXT: v_cndmask_b32_e64 v27, 0, v31, s[20:21] +; GFX9-NEXT: v_cndmask_b32_e64 v29, 0, v31, s[22:23] +; GFX9-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[24:25] +; GFX9-NEXT: v_mov_b32_e32 v1, v48 +; GFX9-NEXT: v_mov_b32_e32 v2, v35 +; GFX9-NEXT: v_mov_b32_e32 v3, v33 +; GFX9-NEXT: v_mov_b32_e32 v4, v36 +; GFX9-NEXT: v_mov_b32_e32 v5, v49 +; GFX9-NEXT: v_mov_b32_e32 v6, v37 +; GFX9-NEXT: v_mov_b32_e32 v7, v34 +; GFX9-NEXT: v_mov_b32_e32 v8, v38 +; GFX9-NEXT: v_mov_b32_e32 v10, v50 +; GFX9-NEXT: v_mov_b32_e32 v12, v39 +; GFX9-NEXT: v_mov_b32_e32 v14, v32 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <16 x ptr addrspace(5)> %ptr to <16 x ptr> + ret <16 x ptr> %cast +} + +define <2 x ptr addrspace(3)> @addrspacecast_v2p0_to_v2p3(<2 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v2p0_to_v2p3: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(3)> + ret <2 x ptr addrspace(3)> %cast +} + +define <3 x ptr addrspace(3)> @addrspacecast_v3p0_to_v3p3(<3 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v3p0_to_v3p3: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(3)> + ret <3 x ptr addrspace(3)> %cast +} + +define <4 x ptr addrspace(3)> @addrspacecast_v4p0_to_v4p3(<4 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v4p0_to_v4p3: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] +; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(3)> + ret <4 x ptr addrspace(3)> %cast +} + +define <8 x ptr addrspace(3)> @addrspacecast_v8p0_to_v8p3(<8 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v8p0_to_v8p3: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] +; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] +; HSA-NEXT: v_cndmask_b32_e32 v4, -1, v8, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] +; HSA-NEXT: v_cndmask_b32_e32 v5, -1, v10, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] +; HSA-NEXT: v_cndmask_b32_e32 v6, -1, v12, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] +; HSA-NEXT: v_cndmask_b32_e32 v7, -1, v14, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(3)> + ret <8 x ptr addrspace(3)> %cast +} + +define <16 x ptr addrspace(3)> @addrspacecast_v16p0_to_v16p3(<16 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v16p0_to_v16p3: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; HSA-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[24:25] +; HSA-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; HSA-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[26:27] +; HSA-NEXT: v_cndmask_b32_e32 v1, -1, v2, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; HSA-NEXT: v_cmp_ne_u64_e64 s[8:9], 0, v[28:29] +; HSA-NEXT: v_cndmask_b32_e32 v2, -1, v4, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7] +; HSA-NEXT: v_cndmask_b32_e32 v3, -1, v6, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9] +; HSA-NEXT: v_cndmask_b32_e32 v4, -1, v8, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11] +; HSA-NEXT: v_cndmask_b32_e32 v5, -1, v10, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13] +; HSA-NEXT: v_cndmask_b32_e64 v13, -1, v26, s[6:7] +; HSA-NEXT: v_cndmask_b32_e32 v6, -1, v12, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15] +; HSA-NEXT: v_cndmask_b32_e64 v12, -1, v24, s[4:5] +; HSA-NEXT: v_cndmask_b32_e32 v7, -1, v14, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17] +; HSA-NEXT: v_cndmask_b32_e64 v14, -1, v28, s[8:9] +; HSA-NEXT: v_cndmask_b32_e32 v8, -1, v16, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19] +; HSA-NEXT: v_cndmask_b32_e32 v9, -1, v18, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21] +; HSA-NEXT: v_cndmask_b32_e32 v10, -1, v20, vcc +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[22:23] +; HSA-NEXT: v_cndmask_b32_e32 v11, -1, v22, vcc +; HSA-NEXT: s_waitcnt vmcnt(0) +; HSA-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[30:31] +; HSA-NEXT: v_cndmask_b32_e32 v15, -1, v30, vcc +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(3)> + ret <16 x ptr addrspace(3)> %cast +} + +define <2 x ptr> @addrspacecast_v2p3_to_v2p0(<2 x ptr addrspace(3)> %ptr) { +; CI-LABEL: addrspacecast_v2p3_to_v2p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x10 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v3, s4 +; CI-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; CI-NEXT: v_mov_b32_e32 v1, v4 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v2p3_to_v2p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v3, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <2 x ptr addrspace(3)> %ptr to <2 x ptr> + ret <2 x ptr> %cast +} + +define <3 x ptr> @addrspacecast_v3p3_to_v3p0(<3 x ptr addrspace(3)> %ptr) { +; CI-LABEL: addrspacecast_v3p3_to_v3p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x10 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v5, s4 +; CI-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; CI-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; CI-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc +; CI-NEXT: v_mov_b32_e32 v1, v7 +; CI-NEXT: v_mov_b32_e32 v2, v6 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v3p3_to_v3p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v5, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <3 x ptr addrspace(3)> %ptr to <3 x ptr> + ret <3 x ptr> %cast +} + +define <4 x ptr> @addrspacecast_v4p3_to_v4p0(<4 x ptr addrspace(3)> %ptr) { +; CI-LABEL: addrspacecast_v4p3_to_v4p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x10 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v7, s4 +; CI-NEXT: v_cndmask_b32_e32 v10, 0, v7, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; CI-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; CI-NEXT: v_cndmask_b32_e32 v5, 0, v7, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; CI-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; CI-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc +; CI-NEXT: v_mov_b32_e32 v1, v10 +; CI-NEXT: v_mov_b32_e32 v2, v8 +; CI-NEXT: v_mov_b32_e32 v3, v9 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v4p3_to_v4p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v7, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v7, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, v10 +; GFX9-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-NEXT: v_mov_b32_e32 v3, v9 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <4 x ptr addrspace(3)> %ptr to <4 x ptr> + ret <4 x ptr> %cast +} + +define <8 x ptr> @addrspacecast_v8p3_to_v8p0(<8 x ptr addrspace(3)> %ptr) { +; CI-LABEL: addrspacecast_v8p3_to_v8p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x10 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v15, s4 +; CI-NEXT: v_cndmask_b32_e32 v22, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v16, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v17, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; CI-NEXT: v_cndmask_b32_e32 v18, 0, v2, vcc +; CI-NEXT: v_cndmask_b32_e32 v19, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; CI-NEXT: v_cndmask_b32_e32 v20, 0, v3, vcc +; CI-NEXT: v_cndmask_b32_e32 v21, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 +; CI-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc +; CI-NEXT: v_cndmask_b32_e32 v9, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5 +; CI-NEXT: v_cndmask_b32_e32 v10, 0, v5, vcc +; CI-NEXT: v_cndmask_b32_e32 v11, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v6 +; CI-NEXT: v_cndmask_b32_e32 v12, 0, v6, vcc +; CI-NEXT: v_cndmask_b32_e32 v13, 0, v15, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v7 +; CI-NEXT: v_cndmask_b32_e32 v14, 0, v7, vcc +; CI-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc +; CI-NEXT: v_mov_b32_e32 v1, v22 +; CI-NEXT: v_mov_b32_e32 v2, v16 +; CI-NEXT: v_mov_b32_e32 v3, v17 +; CI-NEXT: v_mov_b32_e32 v4, v18 +; CI-NEXT: v_mov_b32_e32 v5, v19 +; CI-NEXT: v_mov_b32_e32 v6, v20 +; CI-NEXT: v_mov_b32_e32 v7, v21 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v8p3_to_v8p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v15, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v22, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v20, 0, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v21, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 +; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v5 +; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v6 +; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v6, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v15, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v14, 0, v7, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v15, 0, v15, vcc +; GFX9-NEXT: v_mov_b32_e32 v1, v22 +; GFX9-NEXT: v_mov_b32_e32 v2, v16 +; GFX9-NEXT: v_mov_b32_e32 v3, v17 +; GFX9-NEXT: v_mov_b32_e32 v4, v18 +; GFX9-NEXT: v_mov_b32_e32 v5, v19 +; GFX9-NEXT: v_mov_b32_e32 v6, v20 +; GFX9-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <8 x ptr addrspace(3)> %ptr to <8 x ptr> + ret <8 x ptr> %cast +} + +define <16 x ptr> @addrspacecast_v16p3_to_v16p0(<16 x ptr addrspace(3)> %ptr) { +; CI-LABEL: addrspacecast_v16p3_to_v16p0: +; CI: ; %bb.0: +; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CI-NEXT: s_load_dword s4, s[6:7], 0x10 +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; CI-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; CI-NEXT: v_cmp_ne_u32_e64 s[6:7], -1, v6 +; CI-NEXT: v_cmp_ne_u32_e64 s[8:9], -1, v7 +; CI-NEXT: s_waitcnt lgkmcnt(0) +; CI-NEXT: v_mov_b32_e32 v31, s4 +; CI-NEXT: v_cndmask_b32_e32 v48, 0, v31, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; CI-NEXT: v_cndmask_b32_e32 v35, 0, v1, vcc +; CI-NEXT: v_cndmask_b32_e32 v33, 0, v31, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; CI-NEXT: v_cndmask_b32_e32 v36, 0, v2, vcc +; CI-NEXT: v_cndmask_b32_e32 v49, 0, v31, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; CI-NEXT: v_cndmask_b32_e32 v37, 0, v3, vcc +; CI-NEXT: v_cndmask_b32_e32 v34, 0, v31, vcc +; CI-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 +; CI-NEXT: v_cmp_ne_u32_e64 s[4:5], -1, v5 +; CI-NEXT: v_cndmask_b32_e32 v38, 0, v4, vcc +; CI-NEXT: v_cndmask_b32_e64 v50, 0, v5, s[4:5] +; CI-NEXT: v_cndmask_b32_e64 v39, 0, v6, s[6:7] +; CI-NEXT: v_cndmask_b32_e64 v32, 0, v7, s[8:9] +; CI-NEXT: v_cmp_ne_u32_e64 s[10:11], -1, v8 +; CI-NEXT: v_cmp_ne_u32_e64 s[12:13], -1, v9 +; CI-NEXT: v_cmp_ne_u32_e64 s[14:15], -1, v10 +; CI-NEXT: v_cmp_ne_u32_e64 s[16:17], -1, v11 +; CI-NEXT: v_cmp_ne_u32_e64 s[18:19], -1, v12 +; CI-NEXT: v_cmp_ne_u32_e64 s[20:21], -1, v13 +; CI-NEXT: v_cmp_ne_u32_e64 s[22:23], -1, v14 +; CI-NEXT: v_cmp_ne_u32_e64 s[24:25], -1, v15 +; CI-NEXT: v_cndmask_b32_e64 v16, 0, v8, s[10:11] +; CI-NEXT: v_cndmask_b32_e64 v18, 0, v9, s[12:13] +; CI-NEXT: v_cndmask_b32_e64 v20, 0, v10, s[14:15] +; CI-NEXT: v_cndmask_b32_e64 v22, 0, v11, s[16:17] +; CI-NEXT: v_cndmask_b32_e64 v24, 0, v12, s[18:19] +; CI-NEXT: v_cndmask_b32_e64 v26, 0, v13, s[20:21] +; CI-NEXT: v_cndmask_b32_e64 v28, 0, v14, s[22:23] +; CI-NEXT: v_cndmask_b32_e64 v30, 0, v15, s[24:25] +; CI-NEXT: v_cndmask_b32_e32 v9, 0, v31, vcc +; CI-NEXT: v_cndmask_b32_e64 v11, 0, v31, s[4:5] +; CI-NEXT: v_cndmask_b32_e64 v13, 0, v31, s[6:7] +; CI-NEXT: v_cndmask_b32_e64 v15, 0, v31, s[8:9] +; CI-NEXT: v_cndmask_b32_e64 v17, 0, v31, s[10:11] +; CI-NEXT: v_cndmask_b32_e64 v19, 0, v31, s[12:13] +; CI-NEXT: v_cndmask_b32_e64 v21, 0, v31, s[14:15] +; CI-NEXT: v_cndmask_b32_e64 v23, 0, v31, s[16:17] +; CI-NEXT: v_cndmask_b32_e64 v25, 0, v31, s[18:19] +; CI-NEXT: v_cndmask_b32_e64 v27, 0, v31, s[20:21] +; CI-NEXT: v_cndmask_b32_e64 v29, 0, v31, s[22:23] +; CI-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[24:25] +; CI-NEXT: v_mov_b32_e32 v1, v48 +; CI-NEXT: v_mov_b32_e32 v2, v35 +; CI-NEXT: v_mov_b32_e32 v3, v33 +; CI-NEXT: v_mov_b32_e32 v4, v36 +; CI-NEXT: v_mov_b32_e32 v5, v49 +; CI-NEXT: v_mov_b32_e32 v6, v37 +; CI-NEXT: v_mov_b32_e32 v7, v34 +; CI-NEXT: v_mov_b32_e32 v8, v38 +; CI-NEXT: v_mov_b32_e32 v10, v50 +; CI-NEXT: v_mov_b32_e32 v12, v39 +; CI-NEXT: v_mov_b32_e32 v14, v32 +; CI-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: addrspacecast_v16p3_to_v16p0: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 +; GFX9-NEXT: v_mov_b32_e32 v31, s5 +; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v48, 0, v31, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v35, 0, v1, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v33, 0, v31, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v2 +; GFX9-NEXT: v_cndmask_b32_e32 v36, 0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v49, 0, v31, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v3 +; GFX9-NEXT: v_cndmask_b32_e32 v37, 0, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v34, 0, v31, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, -1, v4 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], -1, v5 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], -1, v6 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[8:9], -1, v7 +; GFX9-NEXT: v_cndmask_b32_e32 v38, 0, v4, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v50, 0, v5, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v39, 0, v6, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e64 v32, 0, v7, s[8:9] +; GFX9-NEXT: v_cmp_ne_u32_e64 s[10:11], -1, v8 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[12:13], -1, v9 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[14:15], -1, v10 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[16:17], -1, v11 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[18:19], -1, v12 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[20:21], -1, v13 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[22:23], -1, v14 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[24:25], -1, v15 +; GFX9-NEXT: v_cndmask_b32_e64 v16, 0, v8, s[10:11] +; GFX9-NEXT: v_cndmask_b32_e64 v18, 0, v9, s[12:13] +; GFX9-NEXT: v_cndmask_b32_e64 v20, 0, v10, s[14:15] +; GFX9-NEXT: v_cndmask_b32_e64 v22, 0, v11, s[16:17] +; GFX9-NEXT: v_cndmask_b32_e64 v24, 0, v12, s[18:19] +; GFX9-NEXT: v_cndmask_b32_e64 v26, 0, v13, s[20:21] +; GFX9-NEXT: v_cndmask_b32_e64 v28, 0, v14, s[22:23] +; GFX9-NEXT: v_cndmask_b32_e64 v30, 0, v15, s[24:25] +; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v31, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, v31, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, v31, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e64 v15, 0, v31, s[8:9] +; GFX9-NEXT: v_cndmask_b32_e64 v17, 0, v31, s[10:11] +; GFX9-NEXT: v_cndmask_b32_e64 v19, 0, v31, s[12:13] +; GFX9-NEXT: v_cndmask_b32_e64 v21, 0, v31, s[14:15] +; GFX9-NEXT: v_cndmask_b32_e64 v23, 0, v31, s[16:17] +; GFX9-NEXT: v_cndmask_b32_e64 v25, 0, v31, s[18:19] +; GFX9-NEXT: v_cndmask_b32_e64 v27, 0, v31, s[20:21] +; GFX9-NEXT: v_cndmask_b32_e64 v29, 0, v31, s[22:23] +; GFX9-NEXT: v_cndmask_b32_e64 v31, 0, v31, s[24:25] +; GFX9-NEXT: v_mov_b32_e32 v1, v48 +; GFX9-NEXT: v_mov_b32_e32 v2, v35 +; GFX9-NEXT: v_mov_b32_e32 v3, v33 +; GFX9-NEXT: v_mov_b32_e32 v4, v36 +; GFX9-NEXT: v_mov_b32_e32 v5, v49 +; GFX9-NEXT: v_mov_b32_e32 v6, v37 +; GFX9-NEXT: v_mov_b32_e32 v7, v34 +; GFX9-NEXT: v_mov_b32_e32 v8, v38 +; GFX9-NEXT: v_mov_b32_e32 v10, v50 +; GFX9-NEXT: v_mov_b32_e32 v12, v39 +; GFX9-NEXT: v_mov_b32_e32 v14, v32 +; GFX9-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <16 x ptr addrspace(3)> %ptr to <16 x ptr> + ret <16 x ptr> %cast +} + +define <2 x ptr addrspace(1)> @addrspacecast_v2p0_to_v2p1(<2 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v2p0_to_v2p1: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(1)> + ret <2 x ptr addrspace(1)> %cast +} + +define <3 x ptr addrspace(1)> @addrspacecast_v3p0_to_v3p1(<3 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v3p0_to_v3p1: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(1)> + ret <3 x ptr addrspace(1)> %cast +} + +define <4 x ptr addrspace(1)> @addrspacecast_v4p0_to_v4p1(<4 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v4p0_to_v4p1: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(1)> + ret <4 x ptr addrspace(1)> %cast +} + +define <8 x ptr addrspace(1)> @addrspacecast_v8p0_to_v8p1(<8 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v8p0_to_v8p1: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(1)> + ret <8 x ptr addrspace(1)> %cast +} + +define <16 x ptr addrspace(1)> @addrspacecast_v16p0_to_v16p1(<16 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v16p0_to_v16p1: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; HSA-NEXT: s_waitcnt vmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(1)> + ret <16 x ptr addrspace(1)> %cast +} + +define <2 x ptr> @addrspacecast_v2p1_to_v2p0(<2 x ptr addrspace(1)> %ptr) { +; HSA-LABEL: addrspacecast_v2p1_to_v2p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <2 x ptr addrspace(1)> %ptr to <2 x ptr> + ret <2 x ptr> %cast +} + +define <1 x ptr> @addrspacecast_v1p1_to_v1p0(<1 x ptr addrspace(1)> %ptr) { +; HSA-LABEL: addrspacecast_v1p1_to_v1p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <1 x ptr addrspace(1)> %ptr to <1 x ptr> + ret <1 x ptr> %cast +} + +define <4 x ptr> @addrspacecast_v4p1_to_v4p0(<4 x ptr addrspace(1)> %ptr) { +; HSA-LABEL: addrspacecast_v4p1_to_v4p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <4 x ptr addrspace(1)> %ptr to <4 x ptr> + ret <4 x ptr> %cast +} + +define <8 x ptr> @addrspacecast_v8p1_to_v8p0(<8 x ptr addrspace(1)> %ptr) { +; HSA-LABEL: addrspacecast_v8p1_to_v8p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <8 x ptr addrspace(1)> %ptr to <8 x ptr> + ret <8 x ptr> %cast +} + +define <16 x ptr> @addrspacecast_v16p1_to_v16p0(<16 x ptr addrspace(1)> %ptr) { +; HSA-LABEL: addrspacecast_v16p1_to_v16p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 +; HSA-NEXT: s_waitcnt vmcnt(0) +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <16 x ptr addrspace(1)> %ptr to <16 x ptr> + ret <16 x ptr> %cast +} + +define <2 x ptr addrspace(6)> @addrspacecast_v2p0_to_v2p6(<2 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v2p0_to_v2p6: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v1, v2 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(6)> + ret <2 x ptr addrspace(6)> %cast +} + +define <3 x ptr addrspace(6)> @addrspacecast_v3p0_to_v3p6(<3 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v3p0_to_v3p6: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v1, v2 +; HSA-NEXT: v_mov_b32_e32 v2, v4 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(6)> + ret <3 x ptr addrspace(6)> %cast +} + +define <4 x ptr addrspace(6)> @addrspacecast_v4p0_to_v4p6(<4 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v4p0_to_v4p6: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v3, v6 +; HSA-NEXT: v_mov_b32_e32 v1, v2 +; HSA-NEXT: v_mov_b32_e32 v2, v4 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(6)> + ret <4 x ptr addrspace(6)> %cast +} + +define <8 x ptr addrspace(6)> @addrspacecast_v8p0_to_v8p6(<8 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v8p0_to_v8p6: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v7, v14 +; HSA-NEXT: v_mov_b32_e32 v5, v10 +; HSA-NEXT: v_mov_b32_e32 v3, v6 +; HSA-NEXT: v_mov_b32_e32 v1, v2 +; HSA-NEXT: v_mov_b32_e32 v2, v4 +; HSA-NEXT: v_mov_b32_e32 v4, v8 +; HSA-NEXT: v_mov_b32_e32 v6, v12 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(6)> + ret <8 x ptr addrspace(6)> %cast +} + +define <16 x ptr addrspace(6)> @addrspacecast_v16p0_to_v16p6(<16 x ptr> %ptr) { +; HSA-LABEL: addrspacecast_v16p0_to_v16p6: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v15, v30 +; HSA-NEXT: v_mov_b32_e32 v13, v26 +; HSA-NEXT: v_mov_b32_e32 v11, v22 +; HSA-NEXT: v_mov_b32_e32 v9, v18 +; HSA-NEXT: v_mov_b32_e32 v7, v14 +; HSA-NEXT: v_mov_b32_e32 v5, v10 +; HSA-NEXT: v_mov_b32_e32 v3, v6 +; HSA-NEXT: v_mov_b32_e32 v1, v2 +; HSA-NEXT: v_mov_b32_e32 v2, v4 +; HSA-NEXT: v_mov_b32_e32 v4, v8 +; HSA-NEXT: v_mov_b32_e32 v6, v12 +; HSA-NEXT: v_mov_b32_e32 v8, v16 +; HSA-NEXT: v_mov_b32_e32 v10, v20 +; HSA-NEXT: v_mov_b32_e32 v12, v24 +; HSA-NEXT: v_mov_b32_e32 v14, v28 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(6)> + ret <16 x ptr addrspace(6)> %cast +} + +define <2 x ptr> @addrspacecast_v2p6_to_v2p0(<2 x ptr addrspace(6)> %ptr) { +; HSA-LABEL: addrspacecast_v2p6_to_v2p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v2, v1 +; HSA-NEXT: v_mov_b32_e32 v1, 0 +; HSA-NEXT: v_mov_b32_e32 v3, 0 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <2 x ptr addrspace(6)> %ptr to <2 x ptr> + ret <2 x ptr> %cast +} + +define <1 x ptr> @addrspacecast_v1p6_to_v1p0(<1 x ptr addrspace(6)> %ptr) { +; HSA-LABEL: addrspacecast_v1p6_to_v1p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v1, 0 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <1 x ptr addrspace(6)> %ptr to <1 x ptr> + ret <1 x ptr> %cast +} + +define <4 x ptr> @addrspacecast_v4p6_to_v4p0(<4 x ptr addrspace(6)> %ptr) { +; HSA-LABEL: addrspacecast_v4p6_to_v4p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v6, v3 +; HSA-NEXT: v_mov_b32_e32 v4, v2 +; HSA-NEXT: v_mov_b32_e32 v2, v1 +; HSA-NEXT: v_mov_b32_e32 v1, 0 +; HSA-NEXT: v_mov_b32_e32 v3, 0 +; HSA-NEXT: v_mov_b32_e32 v5, 0 +; HSA-NEXT: v_mov_b32_e32 v7, 0 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <4 x ptr addrspace(6)> %ptr to <4 x ptr> + ret <4 x ptr> %cast +} + +define <8 x ptr> @addrspacecast_v8p6_to_v8p0(<8 x ptr addrspace(6)> %ptr) { +; HSA-LABEL: addrspacecast_v8p6_to_v8p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v14, v7 +; HSA-NEXT: v_mov_b32_e32 v12, v6 +; HSA-NEXT: v_mov_b32_e32 v10, v5 +; HSA-NEXT: v_mov_b32_e32 v8, v4 +; HSA-NEXT: v_mov_b32_e32 v6, v3 +; HSA-NEXT: v_mov_b32_e32 v4, v2 +; HSA-NEXT: v_mov_b32_e32 v2, v1 +; HSA-NEXT: v_mov_b32_e32 v1, 0 +; HSA-NEXT: v_mov_b32_e32 v3, 0 +; HSA-NEXT: v_mov_b32_e32 v5, 0 +; HSA-NEXT: v_mov_b32_e32 v7, 0 +; HSA-NEXT: v_mov_b32_e32 v9, 0 +; HSA-NEXT: v_mov_b32_e32 v11, 0 +; HSA-NEXT: v_mov_b32_e32 v13, 0 +; HSA-NEXT: v_mov_b32_e32 v15, 0 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <8 x ptr addrspace(6)> %ptr to <8 x ptr> + ret <8 x ptr> %cast +} + +define <16 x ptr> @addrspacecast_v16p6_to_v16p0(<16 x ptr addrspace(6)> %ptr) { +; HSA-LABEL: addrspacecast_v16p6_to_v16p0: +; HSA: ; %bb.0: +; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; HSA-NEXT: v_mov_b32_e32 v30, v15 +; HSA-NEXT: v_mov_b32_e32 v28, v14 +; HSA-NEXT: v_mov_b32_e32 v26, v13 +; HSA-NEXT: v_mov_b32_e32 v24, v12 +; HSA-NEXT: v_mov_b32_e32 v22, v11 +; HSA-NEXT: v_mov_b32_e32 v20, v10 +; HSA-NEXT: v_mov_b32_e32 v18, v9 +; HSA-NEXT: v_mov_b32_e32 v16, v8 +; HSA-NEXT: v_mov_b32_e32 v14, v7 +; HSA-NEXT: v_mov_b32_e32 v12, v6 +; HSA-NEXT: v_mov_b32_e32 v10, v5 +; HSA-NEXT: v_mov_b32_e32 v8, v4 +; HSA-NEXT: v_mov_b32_e32 v6, v3 +; HSA-NEXT: v_mov_b32_e32 v4, v2 +; HSA-NEXT: v_mov_b32_e32 v2, v1 +; HSA-NEXT: v_mov_b32_e32 v1, 0 +; HSA-NEXT: v_mov_b32_e32 v3, 0 +; HSA-NEXT: v_mov_b32_e32 v5, 0 +; HSA-NEXT: v_mov_b32_e32 v7, 0 +; HSA-NEXT: v_mov_b32_e32 v9, 0 +; HSA-NEXT: v_mov_b32_e32 v11, 0 +; HSA-NEXT: v_mov_b32_e32 v13, 0 +; HSA-NEXT: v_mov_b32_e32 v15, 0 +; HSA-NEXT: v_mov_b32_e32 v17, 0 +; HSA-NEXT: v_mov_b32_e32 v19, 0 +; HSA-NEXT: v_mov_b32_e32 v21, 0 +; HSA-NEXT: v_mov_b32_e32 v23, 0 +; HSA-NEXT: v_mov_b32_e32 v25, 0 +; HSA-NEXT: v_mov_b32_e32 v27, 0 +; HSA-NEXT: v_mov_b32_e32 v29, 0 +; HSA-NEXT: v_mov_b32_e32 v31, 0 +; HSA-NEXT: s_setpc_b64 s[30:31] + %cast = addrspacecast <16 x ptr addrspace(6)> %ptr to <16 x ptr> + ret <16 x ptr> %cast +} + declare void @llvm.amdgcn.s.barrier() #1 declare i32 @llvm.amdgcn.workitem.id.x() #2 From 183b38eb2261164fdfd6b7deac002edf27a39fe7 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 23 Sep 2024 12:54:02 +0100 Subject: [PATCH 282/425] [libclc] Split off library build system into helpers This splits off several key parts of the build system into utility methods. This will be used in upcoming patches to help provide additional sets of target-specific builtin libraries. Running llvm-diff on the resulting LLVM bytecode binaries, and regular diff on SPIR-V binaries, shows no differences before and after this patch. --- libclc/CMakeLists.txt | 172 ++++------------------ libclc/cmake/modules/AddLibclc.cmake | 205 +++++++++++++++++++++++++++ 2 files changed, 230 insertions(+), 147 deletions(-) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 3d7c3591a556e56..ba04c0bc8618f27 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -278,49 +278,22 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( DARCH ${ARCH} ) endif() - # Enumerate SOURCES* files - set( source_list ) - foreach( l ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} ) - foreach( s "SOURCES" "SOURCES_${LLVM_MAJOR}.${LLVM_MINOR}" ) - file( TO_CMAKE_PATH ${l}/lib/${s} file_loc ) - file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc ) - # Prepend the location to give higher priority to - # specialized implementation - if( EXISTS ${loc} ) - set( source_list ${file_loc} ${source_list} ) - endif() - endforeach() - endforeach() - - # Add the generated convert.cl here to prevent adding the one listed in - # SOURCES - set( objects ) # A "set" of already-added input files - set( rel_files ) # Source directory input files, relative to the root dir - set( gen_files ) # Generated binary input files, relative to the binary dir - if( NOT ${ARCH} STREQUAL "spirv" AND NOT ${ARCH} STREQUAL "spirv64" ) - if( NOT ENABLE_RUNTIME_SUBNORMAL AND NOT ${ARCH} STREQUAL "clspv" AND - NOT ${ARCH} STREQUAL "clspv64" ) - list( APPEND gen_files convert.cl ) - list( APPEND objects convert.cl ) - list( APPEND rel_files generic/lib/subnormal_use_default.ll ) - elseif(${ARCH} STREQUAL "clspv" OR ${ARCH} STREQUAL "clspv64") - list( APPEND gen_files clspv-convert.cl ) - list( APPEND objects clspv-convert.cl ) + set( opencl_lib_files ) + set( opencl_gen_files ) + + if( NOT ARCH STREQUAL spirv AND NOT ARCH STREQUAL spirv64 ) + if( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 ) + list( APPEND opencl_gen_files clspv-convert.cl ) + elseif ( NOT ENABLE_RUNTIME_SUBNORMAL ) + list( APPEND opencl_gen_files convert.cl ) + list( APPEND opencl_lib_files generic/lib/subnormal_use_default.ll ) endif() endif() - foreach( l ${source_list} ) - file( READ ${l} file_list ) - string( REPLACE "\n" ";" file_list ${file_list} ) - get_filename_component( dir ${l} DIRECTORY ) - foreach( f ${file_list} ) - # Only add each file once, so that targets can 'specialize' builtins - if( NOT ${f} IN_LIST objects ) - list( APPEND objects ${f} ) - list( APPEND rel_files ${dir}/${f} ) - endif() - endforeach() - endforeach() + libclc_configure_lib_source( + opencl_lib_files + DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} + ) foreach( d ${${t}_devices} ) get_libclc_device_info( @@ -331,11 +304,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) CLANG_TRIPLE clang_triple ) - set( mcpu ) - if( NOT "${cpu}" STREQUAL "" ) - set( mcpu "-mcpu=${cpu}" ) - endif() - message( STATUS " device: ${d} ( ${${d}_aliases} )" ) if ( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) @@ -363,109 +331,19 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) -Wno-bitwise-conditional-parentheses ) - set( bytecode_files "" ) - foreach( file IN LISTS gen_files rel_files ) - # We need to take each file and produce an absolute input file, as well - # as a unique architecture-specific output file. We deal with a mix of - # different input files, which makes this trickier. - if( ${file} IN_LIST gen_files ) - # Generated files are given just as file names, which we must make - # absolute to the binary directory. - set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} ) - set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.bc" ) - else() - # Other files are originally relative to each SOURCE file, which are - # then make relative to the libclc root directory. We must normalize - # the path (e.g., ironing out any ".."), then make it relative to the - # root directory again, and use that relative path component for the - # binary path. - get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) - file( RELATIVE_PATH root_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${abs_path} ) - set( input_file ${CMAKE_CURRENT_SOURCE_DIR}/${file} ) - set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.bc" ) - endif() - - get_filename_component( file_dir ${file} DIRECTORY ) - - compile_to_bc( - TRIPLE ${clang_triple} - INPUT ${input_file} - OUTPUT ${output_file} - EXTRA_OPTS "${mcpu}" -fno-builtin -nostdlib - "${build_flags}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir} - DEPENDENCIES generate_convert.cl clspv-generate_convert.cl - ) - list( APPEND bytecode_files ${output_file} ) - endforeach() - - set( builtins_comp_lib_tgt builtins.comp.${arch_suffix} ) - add_custom_target( ${builtins_comp_lib_tgt} - DEPENDS ${bytecode_files} - ) - set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" ) + if( NOT "${cpu}" STREQUAL "" ) + list( APPEND build_flags -mcpu=${cpu} ) + endif() - set( builtins_link_lib_tgt builtins.link.${arch_suffix} ) - link_bc( - TARGET ${builtins_link_lib_tgt} - INPUTS ${bytecode_files} - DEPENDENCIES ${builtins_comp_lib_tgt} + add_libclc_builtin_set( + ARCH ${ARCH} + ARCH_SUFFIX ${arch_suffix} + TRIPLE ${clang_triple} + COMPILE_FLAGS ${build_flags} + OPT_FLAGS ${opt_flags} + LIB_FILES ${opencl_lib_files} + GEN_FILES ${opencl_gen_files} + ALIASES ${${d}_aliases} ) - - set( builtins_link_lib $ ) - - if( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) - set( spv_suffix ${arch_suffix}.spv ) - add_custom_command( OUTPUT ${spv_suffix} - COMMAND ${llvm-spirv_exe} ${spvflags} -o ${spv_suffix} ${builtins_link_lib} - DEPENDS ${llvm-spirv_target} ${builtins_link_lib} ${builtins_link_lib_tgt} - ) - add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" ) - set_target_properties( "prepare-${spv_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" ) - install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix} - DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) - else() - set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) - - # Add opt target - add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc - COMMAND ${opt_exe} ${opt_flags} -o ${builtins_opt_lib_tgt}.bc - ${builtins_link_lib} - DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt} - ) - add_custom_target( ${builtins_opt_lib_tgt} - ALL DEPENDS ${builtins_opt_lib_tgt}.bc - ) - set_target_properties( ${builtins_opt_lib_tgt} PROPERTIES - TARGET_FILE ${CMAKE_CURRENT_BINARY_DIR}/${builtins_opt_lib_tgt}.bc - FOLDER "libclc/Device IR/Opt" - ) - - set( builtins_opt_lib $ ) - - # Add prepare target - set( obj_suffix ${arch_suffix}.bc ) - add_custom_command( OUTPUT ${obj_suffix} - COMMAND ${prepare_builtins_exe} -o ${obj_suffix} ${builtins_opt_lib} - DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} ) - add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} ) - set_target_properties( "prepare-${obj_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" ) - - # nvptx-- targets don't include workitem builtins - if( NOT clang_triple MATCHES ".*ptx.*--$" ) - add_test( NAME external-calls-${obj_suffix} - COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) - endif() - - install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) - foreach( a ${${d}_aliases} ) - set( alias_suffix "${a}-${clang_triple}.bc" ) - add_custom_target( ${alias_suffix} ALL - COMMAND ${CMAKE_COMMAND} -E create_symlink ${obj_suffix} ${alias_suffix} - DEPENDS prepare-${obj_suffix} ) - set_target_properties( "${alias_suffix}" PROPERTIES FOLDER "libclc/Device IR/Aliases" ) - install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${alias_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) - endforeach( a ) - endif() endforeach( d ) endforeach( t ) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index f2032660ba99b0b..147f06bc9a9afc7 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -178,3 +178,208 @@ function(get_libclc_device_info) set( ${ARG_CLANG_TRIPLE} ${ARG_TRIPLE} PARENT_SCOPE ) endif() endfunction() + +# Compiles a list of library source files (provided by LIB_FILES/GEN_FILES) and +# compiles them to LLVM bytecode (or SPIR-V), links them together and optimizes +# them. +# +# For bytecode libraries, a list of ALIASES may optionally be provided to +# produce additional symlinks. +# +# Arguments: +# * ARCH +# libclc architecture being built +# * ARCH_SUFFIX +# libclc architecture/triple suffix +# * TRIPLE +# Triple used to compile +# +# Optional Arguments: +# * LIB_FILES ... +# List of files that should be built for this library +# * GEN_FILES ... +# List of generated files (in build dir) that should be built for this library +# * COMPILE_FLAGS ... +# Compilation options (for clang) +# * OPT_FLAGS ... +# Optimization options (for opt) +# * ALIASES ... +# List of aliases +function(add_libclc_builtin_set) + cmake_parse_arguments(ARG + "" + "ARCH;TRIPLE;ARCH_SUFFIX" + "LIB_FILES;GEN_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES" + ${ARGN} + ) + + if( NOT ARG_ARCH OR NOT ARG_ARCH_SUFFIX OR NOT ARG_TRIPLE ) + message( FATAL_ERROR "Must provide ARCH, ARCH_SUFFIX, and TRIPLE" ) + endif() + + set( bytecode_files "" ) + foreach( file IN LISTS ARG_GEN_FILES ARG_LIB_FILES ) + # We need to take each file and produce an absolute input file, as well + # as a unique architecture-specific output file. We deal with a mix of + # different input files, which makes this trickier. + if( ${file} IN_LIST ARG_GEN_FILES ) + # Generated files are given just as file names, which we must make + # absolute to the binary directory. + set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} ) + set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.bc" ) + else() + # Other files are originally relative to each SOURCE file, which are + # then make relative to the libclc root directory. We must normalize + # the path (e.g., ironing out any ".."), then make it relative to the + # root directory again, and use that relative path component for the + # binary path. + get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) + file( RELATIVE_PATH root_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${abs_path} ) + set( input_file ${CMAKE_CURRENT_SOURCE_DIR}/${file} ) + set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.bc" ) + endif() + + get_filename_component( file_dir ${file} DIRECTORY ) + + compile_to_bc( + TRIPLE ${ARG_TRIPLE} + INPUT ${input_file} + OUTPUT ${output_file} + EXTRA_OPTS -fno-builtin -nostdlib + "${ARG_COMPILE_FLAGS}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir} + DEPENDENCIES generate_convert.cl clspv-generate_convert.cl + ) + list( APPEND bytecode_files ${output_file} ) + endforeach() + + set( builtins_comp_lib_tgt builtins.comp.${ARG_ARCH_SUFFIX} ) + add_custom_target( ${builtins_comp_lib_tgt} + DEPENDS ${bytecode_files} + ) + set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" ) + + set( builtins_link_lib_tgt builtins.link.${ARG_ARCH_SUFFIX} ) + link_bc( + TARGET ${builtins_link_lib_tgt} + INPUTS ${bytecode_files} + DEPENDENCIES ${builtins_comp_lib_tgt} + ) + + set( builtins_link_lib $ ) + + if( ARG_ARCH STREQUAL spirv OR ARG_ARCH STREQUAL spirv64 ) + set( spv_suffix ${ARG_ARCH_SUFFIX}.spv ) + add_custom_command( OUTPUT ${spv_suffix} + COMMAND ${llvm-spirv_exe} ${spvflags} -o ${spv_suffix} ${builtins_link_lib} + DEPENDS ${llvm-spirv_target} ${builtins_link_lib} ${builtins_link_lib_tgt} + ) + add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" ) + set_target_properties( "prepare-${spv_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" ) + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix} + DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + + return() + endif() + + set( builtins_opt_lib_tgt builtins.opt.${ARG_ARCH_SUFFIX} ) + + # Add opt target + add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc + COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${builtins_opt_lib_tgt}.bc + ${builtins_link_lib} + DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt} + ) + add_custom_target( ${builtins_opt_lib_tgt} + ALL DEPENDS ${builtins_opt_lib_tgt}.bc + ) + set_target_properties( ${builtins_opt_lib_tgt} PROPERTIES + TARGET_FILE ${CMAKE_CURRENT_BINARY_DIR}/${builtins_opt_lib_tgt}.bc + FOLDER "libclc/Device IR/Opt" + ) + + set( builtins_opt_lib $ ) + + # Add prepare target + set( obj_suffix ${ARG_ARCH_SUFFIX}.bc ) + add_custom_command( OUTPUT ${obj_suffix} + COMMAND ${prepare_builtins_exe} -o ${obj_suffix} ${builtins_opt_lib} + DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} ) + add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} ) + set_target_properties( "prepare-${obj_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" ) + + # nvptx-- targets don't include workitem builtins + if( NOT ARG_TRIPLE MATCHES ".*ptx.*--$" ) + add_test( NAME external-calls-${obj_suffix} + COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) + endif() + + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + foreach( a ${ARG_ALIASES} ) + set( alias_suffix "${a}-${ARG_TRIPLE}.bc" ) + add_custom_target( ${alias_suffix} ALL + COMMAND ${CMAKE_COMMAND} -E create_symlink ${obj_suffix} ${alias_suffix} + DEPENDS prepare-${obj_suffix} ) + set_target_properties( "${alias_suffix}" PROPERTIES FOLDER "libclc/Device IR/Aliases" ) + install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${alias_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) + endforeach( a ) +endfunction(add_libclc_builtin_set) + +# Produces a list of libclc source files by walking over SOURCES files in a +# given directory. Outputs the list of files in LIB_FILE_LIST. +# +# LIB_FILE_LIST may be pre-populated and is appended to. +# +# Arguments: +# * LIB_ROOT_DIR +# Root directory containing target's lib files, relative to libclc root +# directory. If not provided, is set to '.'. +# * DIRS ... +# List of directories under LIB_ROOT_DIR to walk over searching for SOURCES +# files +function(libclc_configure_lib_source LIB_FILE_LIST) + cmake_parse_arguments(ARG + "" + "LIB_ROOT_DIR" + "DIRS" + ${ARGN} + ) + + if( NOT ARG_LIB_ROOT_DIR ) + set(ARG_LIB_ROOT_DIR ".") + endif() + + # Enumerate SOURCES* files + set( source_list ) + foreach( l ${ARG_DIRS} ) + foreach( s "SOURCES" "SOURCES_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}" ) + file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/${l}/lib/${s} file_loc ) + file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc ) + # Prepend the location to give higher priority to + # specialized implementation + if( EXISTS ${loc} ) + set( source_list ${file_loc} ${source_list} ) + endif() + endforeach() + endforeach() + + ## Add the generated convert files here to prevent adding the ones listed in + ## SOURCES + set( rel_files ${${LIB_FILE_LIST}} ) # Source directory input files, relative to the root dir + set( objects ${${LIB_FILE_LIST}} ) # A "set" of already-added input files + + foreach( l ${source_list} ) + file( READ ${l} file_list ) + string( REPLACE "\n" ";" file_list ${file_list} ) + get_filename_component( dir ${l} DIRECTORY ) + foreach( f ${file_list} ) + # Only add each file once, so that targets can 'specialize' builtins + if( NOT ${f} IN_LIST objects ) + list( APPEND objects ${f} ) + list( APPEND rel_files ${dir}/${f} ) + endif() + endforeach() + endforeach() + + set( ${LIB_FILE_LIST} ${rel_files} PARENT_SCOPE ) +endfunction(libclc_configure_lib_source LIB_FILE_LIST) From b2bdd8bd39e90bfe3c66f6d5600468570a77ede6 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 24 Sep 2024 17:34:50 +0100 Subject: [PATCH 283/425] [libclc] Create an internal 'clc' builtins library Some libclc builtins currently use internal builtins prefixed with '__clc_' for various reasons, e.g., to avoid naming clashes. This commit formalizes this concept by starting to isolate the definitions of these internal clc builtins into a separate self-contained bytecode library, which is linked into each target's libclc OpenCL builtins before optimization takes place. The goal of this step is to allow additional libraries of builtins that provide entry points (or bindings) that are not written in OpenCL C but still wish to expose OpenCL-compatible builtins. By moving the implementations into a separate self-contained library, entry points can share as much code as possible without going through OpenCL C. The overall structure of the internal clc library is similar to the current OpenCL structure, with SOURCES files and targets being able to override the definitions of builtins as needed. The idea is that the OpenCL builtins will begin to need fewer target-specific overrides, as those will slowly move over to the clc builtins instead. Another advantage of having a separate bytecode library with the CLC implementations is that we can internalize the symbols when linking it (separately), whereas currently the CLC symbols make it into the final builtins library (and perhaps even the final compiled binary). This patch starts of with 'dot' as it's relatively self-contained, as opposed to most of the maths builtins which tend to pull in other builtins. We can also start to clang-format the builtins as we go, which should help to modernize the codebase. --- libclc/CMakeLists.txt | 35 +++++++++- libclc/{generic => clc}/include/clc/clcfunc.h | 5 ++ .../{generic => clc}/include/clc/clctypes.h | 11 ++- libclc/clc/include/clc/geometric/clc_dot.h | 2 + libclc/clc/include/clc/geometric/clc_dot.inc | 1 + libclc/clc/include/clc/internal/clc.h | 26 +++++++ libclc/clc/lib/clspv/SOURCES | 1 + libclc/clc/lib/clspv/dummy.cl | 1 + libclc/clc/lib/clspv64 | 1 + libclc/clc/lib/generic/SOURCES | 1 + libclc/clc/lib/generic/geometric/clc_dot.cl | 57 +++++++++++++++ libclc/clc/lib/spirv/SOURCES | 2 + libclc/clc/lib/spirv64/SOURCES | 1 + libclc/cmake/modules/AddLibclc.cmake | 69 +++++++++++++++---- libclc/generic/lib/geometric/dot.cl | 27 ++++---- 15 files changed, 209 insertions(+), 31 deletions(-) rename libclc/{generic => clc}/include/clc/clcfunc.h (85%) rename libclc/{generic => clc}/include/clc/clctypes.h (94%) create mode 100644 libclc/clc/include/clc/geometric/clc_dot.h create mode 100644 libclc/clc/include/clc/geometric/clc_dot.inc create mode 100644 libclc/clc/include/clc/internal/clc.h create mode 100644 libclc/clc/lib/clspv/SOURCES create mode 100644 libclc/clc/lib/clspv/dummy.cl create mode 120000 libclc/clc/lib/clspv64 create mode 100644 libclc/clc/lib/generic/SOURCES create mode 100644 libclc/clc/lib/generic/geometric/clc_dot.cl create mode 100644 libclc/clc/lib/spirv/SOURCES create mode 100644 libclc/clc/lib/spirv64/SOURCES diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index ba04c0bc8618f27..16d74e53295cc12 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -29,7 +29,13 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ptx-nvidiacl/lib/SOURCES; r600/lib/SOURCES; spirv/lib/SOURCES; - spirv64/lib/SOURCES + spirv64/lib/SOURCES; + # CLC internal libraries + clc/lib/generic/SOURCES; + clc/lib/clspv/SOURCES; + clc/lib/clspv64/SOURCES; + clc/lib/spirv/SOURCES; + clc/lib/spirv64/SOURCES; ) set( LIBCLC_MIN_LLVM 3.9.0 ) @@ -278,6 +284,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( DARCH ${ARCH} ) endif() + set( clc_lib_files ) + libclc_configure_lib_source( + clc_lib_files + CLC_INTERNAL + LIB_ROOT_DIR clc + DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} + ) + set( opencl_lib_files ) set( opencl_gen_files ) @@ -326,7 +340,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) list( APPEND build_flags -D__CLC_INTERNAL -D${CLC_TARGET_DEFINE} - -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include + # All libclc builtin libraries see CLC headers + -I${CMAKE_CURRENT_SOURCE_DIR}/clc/include # FIXME: Fix libclc to not require disabling this noisy warning -Wno-bitwise-conditional-parentheses ) @@ -335,6 +350,20 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) list( APPEND build_flags -mcpu=${cpu} ) endif() + add_libclc_builtin_set( + CLC_INTERNAL + ARCH ${ARCH} + ARCH_SUFFIX clc-${arch_suffix} + TRIPLE ${clang_triple} + COMPILE_FLAGS ${build_flags} + OPT_FLAGS ${opt_flags} + LIB_FILES ${clc_lib_files} + ) + + list( APPEND build_flags + -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include + ) + add_libclc_builtin_set( ARCH ${ARCH} ARCH_SUFFIX ${arch_suffix} @@ -344,6 +373,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) LIB_FILES ${opencl_lib_files} GEN_FILES ${opencl_gen_files} ALIASES ${${d}_aliases} + # Link in the CLC builtins and internalize their symbols + INTERNAL_LINK_DEPENDENCIES $ ) endforeach( d ) endforeach( t ) diff --git a/libclc/generic/include/clc/clcfunc.h b/libclc/clc/include/clc/clcfunc.h similarity index 85% rename from libclc/generic/include/clc/clcfunc.h rename to libclc/clc/include/clc/clcfunc.h index 086d780b970859d..fe3406f64fecb8d 100644 --- a/libclc/generic/include/clc/clcfunc.h +++ b/libclc/clc/include/clc/clcfunc.h @@ -1,3 +1,6 @@ +#ifndef __CLC_CLCFUNC_H_ +#define __CLC_CLCFUNC_H_ + #define _CLC_OVERLOAD __attribute__((overloadable)) #define _CLC_DECL #define _CLC_INLINE __attribute__((always_inline)) inline @@ -11,3 +14,5 @@ #else #define _CLC_DEF __attribute__((always_inline)) #endif + +#endif // __CLC_CLCFUNC_H_ diff --git a/libclc/generic/include/clc/clctypes.h b/libclc/clc/include/clc/clctypes.h similarity index 94% rename from libclc/generic/include/clc/clctypes.h rename to libclc/clc/include/clc/clctypes.h index 76b816d395c2888..8ededd967e00339 100644 --- a/libclc/generic/include/clc/clctypes.h +++ b/libclc/clc/include/clc/clctypes.h @@ -1,3 +1,6 @@ +#ifndef __CLC_CLCTYPES_H_ +#define __CLC_CLCTYPES_H_ + /* 6.1.1 Built-in Scalar Data Types */ typedef unsigned char uchar; @@ -8,12 +11,12 @@ typedef unsigned long ulong; typedef __SIZE_TYPE__ size_t; typedef __PTRDIFF_TYPE__ ptrdiff_t; -#define __stdint_join3(a,b,c) a ## b ## c +#define __stdint_join3(a, b, c) a##b##c -#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__) +#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__) #define __uintn_t(n) __stdint_join3(unsigned __INT, n, _TYPE__) -typedef __intn_t(__INTPTR_WIDTH__) intptr_t; +typedef __intn_t(__INTPTR_WIDTH__) intptr_t; typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t; #undef __uintn_t @@ -93,3 +96,5 @@ typedef __attribute__((ext_vector_type(4))) half half4; typedef __attribute__((ext_vector_type(8))) half half8; typedef __attribute__((ext_vector_type(16))) half half16; #endif + +#endif // __CLC_CLCTYPES_H_ diff --git a/libclc/clc/include/clc/geometric/clc_dot.h b/libclc/clc/include/clc/geometric/clc_dot.h new file mode 100644 index 000000000000000..e0e47ab2093efdb --- /dev/null +++ b/libclc/clc/include/clc/geometric/clc_dot.h @@ -0,0 +1,2 @@ +#define __CLC_BODY +#include diff --git a/libclc/clc/include/clc/geometric/clc_dot.inc b/libclc/clc/include/clc/geometric/clc_dot.inc new file mode 100644 index 000000000000000..016b564df362d20 --- /dev/null +++ b/libclc/clc/include/clc/geometric/clc_dot.inc @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT __clc_dot(__CLC_FLOATN p0, __CLC_FLOATN p1); diff --git a/libclc/clc/include/clc/internal/clc.h b/libclc/clc/include/clc/internal/clc.h new file mode 100644 index 000000000000000..c3bdfd754105f74 --- /dev/null +++ b/libclc/clc/include/clc/internal/clc.h @@ -0,0 +1,26 @@ +#ifndef __CLC_INTERNAL_CLC_H_ +#define __CLC_INTERNAL_CLC_H_ + +#ifndef cl_clang_storage_class_specifiers +#error Implementation requires cl_clang_storage_class_specifiers extension! +#endif + +#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#endif + +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#endif + +/* Function Attributes */ +#include + +/* 6.1 Supported Data Types */ +#include + +#pragma OPENCL EXTENSION all : disable + +#endif // __CLC_INTERNAL_CLC_H_ diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES new file mode 100644 index 000000000000000..75a3130357c3456 --- /dev/null +++ b/libclc/clc/lib/clspv/SOURCES @@ -0,0 +1 @@ +dummy.cl diff --git a/libclc/clc/lib/clspv/dummy.cl b/libclc/clc/lib/clspv/dummy.cl new file mode 100644 index 000000000000000..fab17ac780e3751 --- /dev/null +++ b/libclc/clc/lib/clspv/dummy.cl @@ -0,0 +1 @@ +// Empty file diff --git a/libclc/clc/lib/clspv64 b/libclc/clc/lib/clspv64 new file mode 120000 index 000000000000000..ea01ba94bc63684 --- /dev/null +++ b/libclc/clc/lib/clspv64 @@ -0,0 +1 @@ +clspv \ No newline at end of file diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES new file mode 100644 index 000000000000000..fa2e4f50b99cd77 --- /dev/null +++ b/libclc/clc/lib/generic/SOURCES @@ -0,0 +1 @@ +geometric/clc_dot.cl diff --git a/libclc/clc/lib/generic/geometric/clc_dot.cl b/libclc/clc/lib/generic/geometric/clc_dot.cl new file mode 100644 index 000000000000000..bf0f19b51bc05ea --- /dev/null +++ b/libclc/clc/lib/generic/geometric/clc_dot.cl @@ -0,0 +1,57 @@ +#include + +_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; } + +_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) { + return p0.x * p1.x + p0.y * p1.y; +} + +_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; +} + +_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; +} + +#ifdef cl_khr_fp64 + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) { + return p0 * p1; +} + +_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) { + return p0.x * p1.x + p0.y * p1.y; +} + +_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; +} + +_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; +} + +#endif + +#ifdef cl_khr_fp16 + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; } + +_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) { + return p0.x * p1.x + p0.y * p1.y; +} + +_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z; +} + +_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) { + return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w; +} + +#endif diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES new file mode 100644 index 000000000000000..d8effd19613c8b5 --- /dev/null +++ b/libclc/clc/lib/spirv/SOURCES @@ -0,0 +1,2 @@ +../generic/geometric/clc_dot.cl + diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES new file mode 100644 index 000000000000000..9200810ace38e7c --- /dev/null +++ b/libclc/clc/lib/spirv64/SOURCES @@ -0,0 +1 @@ +../generic/geometric/clc_dot.cl diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 147f06bc9a9afc7..ee7c8500c8359fb 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -76,6 +76,8 @@ endfunction() # Links together one or more bytecode files # # Arguments: +# * INTERNALIZE +# Set if -internalize flag should be passed when linking # * TARGET # Custom target to create # * INPUT ... @@ -84,7 +86,7 @@ endfunction() # List of extra dependencies to inject function(link_bc) cmake_parse_arguments(ARG - "" + "INTERNALIZE" "TARGET" "INPUTS;DEPENDENCIES" ${ARGN} @@ -97,7 +99,7 @@ function(link_bc) file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE ) # Turn it into a space-separate list of input files list( JOIN ARG_INPUTS " " RSP_INPUT ) - file( WRITE ${RSP_FILE} ${RSP_INPUT} ) + file( GENERATE OUTPUT ${RSP_FILE} CONTENT ${RSP_INPUT} ) # Ensure that if this file is removed, we re-run CMake set_property( DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${RSP_FILE} @@ -107,7 +109,7 @@ function(link_bc) add_custom_command( OUTPUT ${ARG_TARGET}.bc - COMMAND ${llvm-link_exe} -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG} + COMMAND ${llvm-link_exe} $<$:--internalize> -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG} DEPENDS ${llvm-link_target} ${ARG_DEPENDENCIES} ${ARG_INPUTS} ${RSP_FILE} ) @@ -195,6 +197,9 @@ endfunction() # Triple used to compile # # Optional Arguments: +# * CLC_INTERNAL +# Pass if compiling the internal CLC builtin libraries, which are not +# optimized and do not have aliases created. # * LIB_FILES ... # List of files that should be built for this library # * GEN_FILES ... @@ -205,11 +210,14 @@ endfunction() # Optimization options (for opt) # * ALIASES ... # List of aliases +# * INTERNAL_LINK_DEPENDENCIES ... +# A list of extra bytecode files to link into the builtin library. Symbols +# from these link dependencies will be internalized during linking. function(add_libclc_builtin_set) cmake_parse_arguments(ARG - "" + "CLC_INTERNAL" "ARCH;TRIPLE;ARCH_SUFFIX" - "LIB_FILES;GEN_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES" + "LIB_FILES;GEN_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES;INTERNAL_LINK_DEPENDENCIES" ${ARGN} ) @@ -258,12 +266,42 @@ function(add_libclc_builtin_set) ) set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" ) + if( NOT bytecode_files ) + message(FATAL_ERROR "Cannot create an empty builtins library") + endif() + set( builtins_link_lib_tgt builtins.link.${ARG_ARCH_SUFFIX} ) - link_bc( - TARGET ${builtins_link_lib_tgt} - INPUTS ${bytecode_files} - DEPENDENCIES ${builtins_comp_lib_tgt} - ) + + if( NOT ARG_INTERNAL_LINK_DEPENDENCIES ) + link_bc( + TARGET ${builtins_link_lib_tgt} + INPUTS ${bytecode_files} + DEPENDENCIES ${builtins_comp_lib_tgt} + ) + else() + # If we have libraries to link while internalizing their symbols, we need + # two separate link steps; the --internalize flag applies to all link + # inputs but the first. + set( builtins_link_lib_tmp_tgt builtins.link.pre-deps.${ARG_ARCH_SUFFIX} ) + link_bc( + TARGET ${builtins_link_lib_tmp_tgt} + INPUTS ${bytecode_files} + DEPENDENCIES ${builtins_comp_lib_tgt} + ) + link_bc( + INTERNALIZE + TARGET ${builtins_link_lib_tgt} + INPUTS $ + ${ARG_INTERNAL_LINK_DEPENDENCIES} + DEPENDENCIES ${builtins_link_lib_tmp_tgt} + ) + endif() + + # For the CLC internal builtins, exit here - we only optimize the targets' + # entry points once we've linked the CLC buitins into them + if( ARG_CLC_INTERNAL ) + return() + endif() set( builtins_link_lib $ ) @@ -331,6 +369,9 @@ endfunction(add_libclc_builtin_set) # LIB_FILE_LIST may be pre-populated and is appended to. # # Arguments: +# * CLC_INTERNAL +# Pass if compiling the internal CLC builtin libraries, which have a +# different directory structure. # * LIB_ROOT_DIR # Root directory containing target's lib files, relative to libclc root # directory. If not provided, is set to '.'. @@ -339,7 +380,7 @@ endfunction(add_libclc_builtin_set) # files function(libclc_configure_lib_source LIB_FILE_LIST) cmake_parse_arguments(ARG - "" + "CLC_INTERNAL" "LIB_ROOT_DIR" "DIRS" ${ARGN} @@ -353,7 +394,11 @@ function(libclc_configure_lib_source LIB_FILE_LIST) set( source_list ) foreach( l ${ARG_DIRS} ) foreach( s "SOURCES" "SOURCES_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}" ) - file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/${l}/lib/${s} file_loc ) + if( ARG_CLC_INTERNAL ) + file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/lib/${l}/${s} file_loc ) + else() + file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/${l}/lib/${s} file_loc ) + endif() file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc ) # Prepend the location to give higher priority to # specialized implementation diff --git a/libclc/generic/lib/geometric/dot.cl b/libclc/generic/lib/geometric/dot.cl index e58bc26f4333a7e..e790d02636563cd 100644 --- a/libclc/generic/lib/geometric/dot.cl +++ b/libclc/generic/lib/geometric/dot.cl @@ -1,19 +1,20 @@ #include +#include _CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) { - return p0*p1; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) { - return p0.x*p1.x + p0.y*p1.y; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; + return __clc_dot(p0, p1); } #ifdef cl_khr_fp64 @@ -21,19 +22,19 @@ _CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) { #pragma OPENCL EXTENSION cl_khr_fp64 : enable _CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) { - return p0*p1; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) { - return p0.x*p1.x + p0.y*p1.y; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; + return __clc_dot(p0, p1); } #endif @@ -42,20 +43,18 @@ _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) { #pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { - return p0*p1; -} +_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) { - return p0.x*p1.x + p0.y*p1.y; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z; + return __clc_dot(p0, p1); } _CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) { - return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w; + return __clc_dot(p0, p1); } #endif From 667deb640870cbdaac941f30d189ef3fe926141c Mon Sep 17 00:00:00 2001 From: Edd Dawson Date: Tue, 29 Oct 2024 13:21:48 +0000 Subject: [PATCH 284/425] [PS4/PS5][Driver] Apply clang-format to PS4CPU.cpp (NFC) (#114038) --- clang/lib/Driver/ToolChains/PS4CPU.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 719bba41436a57b..9daafbe703f68eb 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -430,8 +430,7 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple, } void toolchains::PS4PS5Base::AddClangSystemIncludeArgs( - const ArgList &DriverArgs, - ArgStringList &CC1Args) const { + const ArgList &DriverArgs, ArgStringList &CC1Args) const { const Driver &D = getDriver(); if (DriverArgs.hasArg(options::OPT_nostdinc)) From d732c0b13c55259177f2936516b6087d634078e0 Mon Sep 17 00:00:00 2001 From: neildhickey Date: Tue, 29 Oct 2024 13:34:43 +0000 Subject: [PATCH 285/425] [clang][AArch64] Add getHostCPUFeatures to query for enabled features in cpu info (#97749) Add getHostCPUFeatures into the AArch64 Target Parser to query the cpuinfo for the device in the case where we are compiling with -mcpu=native. Add LLVM_CPUINFO environment variable to test mock /proc/cpuinfo files for -mcpu=native Co-authored-by: Elvina Yakubova --- clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 20 ++- clang/test/Driver/Inputs/cpunative/cortex-a57 | 8 + clang/test/Driver/Inputs/cpunative/cortex-a72 | 8 + clang/test/Driver/Inputs/cpunative/cortex-a76 | 8 + .../test/Driver/Inputs/cpunative/neoverse-n1 | 8 + .../test/Driver/Inputs/cpunative/neoverse-v2 | 8 + clang/test/Driver/aarch64-mcpu-native.c | 138 ++++++++++++++++++ llvm/lib/TargetParser/Host.cpp | 10 +- 8 files changed, 198 insertions(+), 10 deletions(-) create mode 100644 clang/test/Driver/Inputs/cpunative/cortex-a57 create mode 100644 clang/test/Driver/Inputs/cpunative/cortex-a72 create mode 100644 clang/test/Driver/Inputs/cpunative/cortex-a76 create mode 100644 clang/test/Driver/Inputs/cpunative/neoverse-n1 create mode 100644 clang/test/Driver/Inputs/cpunative/neoverse-v2 create mode 100644 clang/test/Driver/aarch64-mcpu-native.c diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index f083e40df131449..1e2ac4e501bafd1 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -135,15 +135,21 @@ getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March, return true; } -static bool -getAArch64ArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, - const ArgList &Args, - llvm::AArch64::ExtensionSet &Extensions) { +static bool getAArch64ArchFeaturesFromMcpu( + const Driver &D, StringRef Mcpu, const ArgList &Args, + llvm::AArch64::ExtensionSet &Extensions, std::vector &Features) { StringRef CPU; std::string McpuLowerCase = Mcpu.lower(); if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, Extensions)) return false; + if (Mcpu == "native") { + llvm::StringMap HostFeatures = llvm::sys::getHostCPUFeatures(); + for (auto &[Feature, Enabled] : HostFeatures) { + Features.push_back(Args.MakeArgString((Enabled ? "+" : "-") + Feature)); + } + } + return true; } @@ -210,11 +216,11 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Extensions); else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) - success = - getAArch64ArchFeaturesFromMcpu(D, A->getValue(), Args, Extensions); + success = getAArch64ArchFeaturesFromMcpu(D, A->getValue(), Args, Extensions, + Features); else if (isCPUDeterminedByTriple(Triple)) success = getAArch64ArchFeaturesFromMcpu( - D, getAArch64TargetCPU(Args, Triple, A), Args, Extensions); + D, getAArch64TargetCPU(Args, Triple, A), Args, Extensions, Features); else // Default to 'A' profile if the architecture is not specified. success = getAArch64ArchFeaturesFromMarch(D, "armv8-a", Args, Extensions); diff --git a/clang/test/Driver/Inputs/cpunative/cortex-a57 b/clang/test/Driver/Inputs/cpunative/cortex-a57 new file mode 100644 index 000000000000000..e1903012ab79ccd --- /dev/null +++ b/clang/test/Driver/Inputs/cpunative/cortex-a57 @@ -0,0 +1,8 @@ +processor : 0 +BogoMIPS : 200.00 +Features : fp asimd evtstrm crc32 cpuid +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd07 +CPU revision : 1 diff --git a/clang/test/Driver/Inputs/cpunative/cortex-a72 b/clang/test/Driver/Inputs/cpunative/cortex-a72 new file mode 100644 index 000000000000000..7aed4a6fa732369 --- /dev/null +++ b/clang/test/Driver/Inputs/cpunative/cortex-a72 @@ -0,0 +1,8 @@ +processor : 0 +BogoMIPS : 250.00 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid asimdrdm +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x0 +CPU part : 0xd08 +CPU revision : 2 diff --git a/clang/test/Driver/Inputs/cpunative/cortex-a76 b/clang/test/Driver/Inputs/cpunative/cortex-a76 new file mode 100644 index 000000000000000..21822cfcec60b0e --- /dev/null +++ b/clang/test/Driver/Inputs/cpunative/cortex-a76 @@ -0,0 +1,8 @@ +processor : 0 +BogoMIPS : 500.00 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm ssbs jscvt fcma +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd0b +CPU revision : 2 diff --git a/clang/test/Driver/Inputs/cpunative/neoverse-n1 b/clang/test/Driver/Inputs/cpunative/neoverse-n1 new file mode 100644 index 000000000000000..571e8840b09f08a --- /dev/null +++ b/clang/test/Driver/Inputs/cpunative/neoverse-n1 @@ -0,0 +1,8 @@ +processor : 0 +BogoMIPS : 50.00 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop asimddp ssbs +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x3 +CPU part : 0xd0c +CPU revision : 1 diff --git a/clang/test/Driver/Inputs/cpunative/neoverse-v2 b/clang/test/Driver/Inputs/cpunative/neoverse-v2 new file mode 100644 index 000000000000000..c3c8433415d7a08 --- /dev/null +++ b/clang/test/Driver/Inputs/cpunative/neoverse-v2 @@ -0,0 +1,8 @@ +processor : 0 +BogoMIPS : 2000.00 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bf16 dgh bti +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x0 +CPU part : 0xd4f +CPU revision : 0 diff --git a/clang/test/Driver/aarch64-mcpu-native.c b/clang/test/Driver/aarch64-mcpu-native.c new file mode 100644 index 000000000000000..f1d0ba76ad79c49 --- /dev/null +++ b/clang/test/Driver/aarch64-mcpu-native.c @@ -0,0 +1,138 @@ +// REQUIRES: aarch64-registered-target +// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/neoverse-v2 +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-NV2 --implicit-check-not=FEAT_ %s + +// CHECK-FEAT-NV2: Extensions enabled for the given AArch64 target +// CHECK-FEAT-NV2-EMPTY: +// CHECK-FEAT-NV2: Architecture Feature(s) Description +// CHECK-FEAT-NV2: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-FEAT-NV2: FEAT_AMUv1 Enable Armv8.4-A Activity Monitors extension +// CHECK-FEAT-NV2: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-FEAT-NV2: FEAT_BF16 Enable BFloat16 Extension +// CHECK-FEAT-NV2: FEAT_BTI Enable Branch Target Identification +// CHECK-FEAT-NV2: FEAT_CCIDX Enable Armv8.3-A Extend of the CCSIDR number of sets +// CHECK-FEAT-NV2: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions +// CHECK-FEAT-NV2: FEAT_CSV2_2 Enable architectural speculation restriction +// CHECK-FEAT-NV2: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions +// CHECK-FEAT-NV2: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence +// CHECK-FEAT-NV2: FEAT_DPB2 Enable Armv8.5-A Cache Clean to Point of Deep Persistence +// CHECK-FEAT-NV2: FEAT_DotProd Enable dot product support +// CHECK-FEAT-NV2: FEAT_ETE Enable Embedded Trace Extension +// CHECK-FEAT-NV2: FEAT_FCMA Enable Armv8.3-A Floating-point complex number support +// CHECK-FEAT-NV2: FEAT_FHM Enable FP16 FML instructions +// CHECK-FEAT-NV2: FEAT_FP Enable Armv8.0-A Floating Point Extensions +// CHECK-FEAT-NV2: FEAT_FP16 Enable half-precision floating-point data processing +// CHECK-FEAT-NV2: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int +// CHECK-FEAT-NV2: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions +// CHECK-FEAT-NV2: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons +// CHECK-FEAT-NV2: FEAT_I8MM Enable Matrix Multiply Int8 Extension +// CHECK-FEAT-NV2: FEAT_JSCVT Enable Armv8.3-A JavaScript FP conversion instructions +// CHECK-FEAT-NV2: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension +// CHECK-FEAT-NV2: FEAT_LRCPC Enable support for RCPC extension +// CHECK-FEAT-NV2: FEAT_LRCPC2 Enable Armv8.4-A RCPC instructions with Immediate Offsets +// CHECK-FEAT-NV2: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions +// CHECK-FEAT-NV2: FEAT_LSE2 Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules +// CHECK-FEAT-NV2: FEAT_MPAM Enable Armv8.4-A Memory system Partitioning and Monitoring extension +// CHECK-FEAT-NV2: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension +// CHECK-FEAT-NV2: FEAT_NV, FEAT_NV2 Enable Armv8.4-A Nested Virtualization Enchancement +// CHECK-FEAT-NV2: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension +// CHECK-FEAT-NV2: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants +// CHECK-FEAT-NV2: FEAT_PAuth Enable Armv8.3-A Pointer Authentication extension +// CHECK-FEAT-NV2: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension +// CHECK-FEAT-NV2: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions +// CHECK-FEAT-NV2: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions +// CHECK-FEAT-NV2: FEAT_RNG Enable Random Number generation instructions +// CHECK-FEAT-NV2: FEAT_SB Enable Armv8.5-A Speculation Barrier +// CHECK-FEAT-NV2: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension +// CHECK-FEAT-NV2: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-FEAT-NV2: FEAT_SPE Enable Statistical Profiling extension +// CHECK-FEAT-NV2: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions +// CHECK-FEAT-NV2: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-FEAT-NV2: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions +// CHECK-FEAT-NV2: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions +// CHECK-FEAT-NV2: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions +// CHECK-FEAT-NV2: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions +// CHECK-FEAT-NV2: FEAT_TRBE Enable Trace Buffer Extension +// CHECK-FEAT-NV2: FEAT_TRF Enable Armv8.4-A Trace extension +// CHECK-FEAT-NV2: FEAT_UAO Enable Armv8.2-A UAO PState +// CHECK-FEAT-NV2: FEAT_VHE Enable Armv8.1-A Virtual Host extension + +// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/neoverse-n1 +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-NN1 --implicit-check-not=FEAT_ %s + +// CHECK-FEAT-NN1: Extensions enabled for the given AArch64 target +// CHECK-FEAT-NN1-EMPTY: +// CHECK-FEAT-NN1: Architecture Feature(s) Description +// CHECK-FEAT-NN1: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-FEAT-NN1: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-FEAT-NN1: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions +// CHECK-FEAT-NN1: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence +// CHECK-FEAT-NN1: FEAT_DotProd Enable dot product support +// CHECK-FEAT-NN1: FEAT_FP Enable Armv8.0-A Floating Point Extensions +// CHECK-FEAT-NN1: FEAT_FP16 Enable half-precision floating-point data processing +// CHECK-FEAT-NN1: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension +// CHECK-FEAT-NN1: FEAT_LRCPC Enable support for RCPC extension +// CHECK-FEAT-NN1: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions +// CHECK-FEAT-NN1: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension +// CHECK-FEAT-NN1: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants +// CHECK-FEAT-NN1: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension +// CHECK-FEAT-NN1: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions +// CHECK-FEAT-NN1: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions +// CHECK-FEAT-NN1: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-FEAT-NN1: FEAT_SPE Enable Statistical Profiling extension +// CHECK-FEAT-NN1: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-FEAT-NN1: FEAT_UAO Enable Armv8.2-A UAO PState +// CHECK-FEAT-NN1: FEAT_VHE Enable Armv8.1-A Virtual Host extension + + +// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/cortex-a57 +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-CA57 --implicit-check-not=FEAT_ %s + +// CHECK-FEAT-CA57: Extensions enabled for the given AArch64 target +// CHECK-FEAT-CA57-EMPTY: +// CHECK-FEAT-CA57: Architecture Feature(s) Description +// CHECK-FEAT-CA57: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-FEAT-CA57: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-FEAT-CA57: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions +// CHECK-FEAT-CA57: FEAT_FP Enable Armv8.0-A Floating Point Extensions +// CHECK-FEAT-CA57: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension +// CHECK-FEAT-CA57: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support + +// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/cortex-a72 +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-CA72 --implicit-check-not=FEAT_ %s + +// CHECK-FEAT-CA72: Extensions enabled for the given AArch64 target +// CHECK-EMPTY: +// CHECK-FEAT-CA72: Architecture Feature(s) Description +// CHECK-FEAT-CA72: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-FEAT-CA72: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-FEAT-CA72: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions +// CHECK-FEAT-CA72: FEAT_FP Enable Armv8.0-A Floating Point Extensions +// CHECK-FEAT-CA72: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension +// CHECK-FEAT-CA72: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support + +// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/cortex-a76 +// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-CA76 --implicit-check-not=FEAT_ %s + +// CHECK-FEAT-CA76: Extensions enabled for the given AArch64 target +// CHECK-FEAT-CA76-EMPTY: +// CHECK-FEAT-CA76: Architecture Feature(s) Description +// CHECK-FEAT-CA76: FEAT_AES, FEAT_PMULL Enable AES support +// CHECK-FEAT-CA76: FEAT_AdvSIMD Enable Advanced SIMD instructions +// CHECK-FEAT-CA76: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions +// CHECK-FEAT-CA76: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence +// CHECK-FEAT-CA76: FEAT_DotProd Enable dot product support +// CHECK-FEAT-CA76: FEAT_FP Enable Armv8.0-A Floating Point Extensions +// CHECK-FEAT-CA76: FEAT_FP16 Enable half-precision floating-point data processing +// CHECK-FEAT-CA76: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension +// CHECK-FEAT-CA76: FEAT_LRCPC Enable support for RCPC extension +// CHECK-FEAT-CA76: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions +// CHECK-FEAT-CA76: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension +// CHECK-FEAT-CA76: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants +// CHECK-FEAT-CA76: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension +// CHECK-FEAT-CA76: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions +// CHECK-FEAT-CA76: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions +// CHECK-FEAT-CA76: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support +// CHECK-FEAT-CA76: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit +// CHECK-FEAT-CA76: FEAT_UAO Enable Armv8.2-A UAO PState +// CHECK-FEAT-CA76: FEAT_VHE Enable Armv8.1-A Virtual Host extension diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 5c4e3a9dc52b0f9..de6c4edebba39a6 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -68,11 +68,15 @@ using namespace llvm; static std::unique_ptr LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { + const char *CPUInfoFile = "/proc/cpuinfo"; + if (const char *CpuinfoIntercept = std::getenv("LLVM_CPUINFO")) + CPUInfoFile = CpuinfoIntercept; llvm::ErrorOr> Text = - llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); + llvm::MemoryBuffer::getFileAsStream(CPUInfoFile); + if (std::error_code EC = Text.getError()) { - llvm::errs() << "Can't read " - << "/proc/cpuinfo: " << EC.message() << "\n"; + llvm::errs() << "Can't read " << CPUInfoFile << ": " << EC.message() + << "\n"; return nullptr; } return std::move(*Text); From 4b44639a4320f980b3c9fa3b96e911e0741f179c Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 29 Oct 2024 09:38:48 -0400 Subject: [PATCH 286/425] Nominate Erich Keane for OpenACC (#114041) Erich is the driving force behind the OpenACC implementation work that has recently begun in Clang. Given his expertise on the topic and that he's already aware of maintainer expectations (he maintains templates and attributes currently), we should recognize that he's also the one maintaining OpenACC. --- clang/Maintainers.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index 6635727e9eb65c6..54690452681a608 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -250,6 +250,12 @@ OpenCL conformance | anastasia\@compiler-experts.com (email), Anastasia (Phabricator), AnastasiaStulova (GitHub) +OpenACC +~~~~~~~ +| Erich Keane +| ekeane\@nvidia.com (email), ErichKeane (Phabricator), erichkeane (GitHub) + + SYCL conformance ~~~~~~~~~~~~~~~~ | Alexey Bader From ec871cfcdf3a46ca1be9842035e4271524644ed1 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 29 Oct 2024 15:00:36 +0100 Subject: [PATCH 287/425] [clang][bytecode][NFC] Remove Pointer::elem() (#114046) Unused. --- clang/lib/AST/ByteCode/Pointer.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h index 72e255dba13f6ba..457fe93b2781757 100644 --- a/clang/lib/AST/ByteCode/Pointer.h +++ b/clang/lib/AST/ByteCode/Pointer.h @@ -653,15 +653,6 @@ class Pointer { return *reinterpret_cast(asBlockPointer().Pointee->rawData() + Offset); } - /// Dereferences a primitive element. - template T &elem(unsigned I) const { - assert(I < getNumElems()); - assert(isBlockPointer()); - assert(asBlockPointer().Pointee); - return reinterpret_cast(asBlockPointer().Pointee->data() + - sizeof(InitMapPtr))[I]; - } - /// Whether this block can be read from at all. This is only true for /// block pointers that point to a valid location inside that block. bool isDereferencable() const { From c370869cd6f66e3c2ab33528e44959279311e499 Mon Sep 17 00:00:00 2001 From: Piotr Fusik Date: Tue, 29 Oct 2024 15:01:37 +0100 Subject: [PATCH 288/425] [mlir][NFC] Avoid a warning (#114052) gcc 14.1 warning: template-id not allowed for destructor in C++20 [-Wtemplate-id-cdtor] --- mlir/lib/Pass/PassRegistry.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index fe842755958418e..029512fd3ecc118 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -422,7 +422,7 @@ llvm::cl::OptionValue::operator=( return *this; } -llvm::cl::OptionValue::~OptionValue() = default; +llvm::cl::OptionValue::~OptionValue() = default; void llvm::cl::OptionValue::setValue( const OpPassManager &newValue) { From f490697cb9ad3db101ced7f4844002ffa0c73da8 Mon Sep 17 00:00:00 2001 From: Boaz Brickner Date: Tue, 29 Oct 2024 15:08:24 +0100 Subject: [PATCH 289/425] [clang] [NFC] Fix a couple of typos: assuments and assingment --- clang/lib/Sema/CheckExprLifetime.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp index aa0a2e223e708f0..357082fe329350d 100644 --- a/clang/lib/Sema/CheckExprLifetime.cpp +++ b/clang/lib/Sema/CheckExprLifetime.cpp @@ -472,7 +472,7 @@ shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) { } // Return true if this is an "normal" assignment operator. -// We assuments that a normal assingment operator always returns *this, that is, +// We assume that a normal assignment operator always returns *this, that is, // an lvalue reference that is the same type as the implicit object parameter // (or the LHS for a non-member operator$=). static bool isNormalAssignmentOperator(const FunctionDecl *FD) { From 87b6ec3be6b80f8e35d2eaea468e6bca79e79c2e Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Tue, 29 Oct 2024 15:08:41 +0100 Subject: [PATCH 290/425] [clang][bytecode] Diagnose placement-new construction to inactive field (#114047) We can reuse CheckActive() for this. --- clang/lib/AST/ByteCode/Interp.cpp | 5 +++++ clang/test/AST/ByteCode/placement-new.cpp | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 6e45cfb7e8a20ce..513d4512b45cff4 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -1451,6 +1451,11 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, const Expr *E, << StorageType << AllocType; return false; } + + // Can't activate fields in a union, unless the direct base is the union. + if (Ptr.inUnion() && !Ptr.isActive() && !Ptr.getBase().getRecord()->isUnion()) + return CheckActive(S, OpPC, Ptr, AK_Construct); + return true; } diff --git a/clang/test/AST/ByteCode/placement-new.cpp b/clang/test/AST/ByteCode/placement-new.cpp index 5673b5cba3f700b..56f54ff168f3e85 100644 --- a/clang/test/AST/ByteCode/placement-new.cpp +++ b/clang/test/AST/ByteCode/placement-new.cpp @@ -14,7 +14,9 @@ namespace std { template constexpr void construct_at(void *p, Args &&...args) { new (p) T((Args&&)args...); // both-note {{in call to}} \ - // both-note {{placement new would change type of storage from 'int' to 'float'}} + // both-note {{placement new would change type of storage from 'int' to 'float'}} \ + // both-note {{construction of subobject of member 'x' of union with active member 'a' is not allowed in a constant expression}} + } } @@ -284,6 +286,18 @@ namespace ConstructAt { static_assert(bad_construct_at_type()); // both-error {{not an integral constant expression}} \ // both-note {{in call}} + constexpr bool bad_construct_at_subobject() { + struct X { int a, b; }; + union A { + int a; + X x; + }; + A a = {1}; + std::construct_at(&a.x.a, 1); // both-note {{in call}} + return true; + } + static_assert(bad_construct_at_subobject()); // both-error{{not an integral constant expression}} \ + // both-note {{in call}} } namespace UsedToCrash { From b9376915cf897e79a852497c60f18ddacb1830ae Mon Sep 17 00:00:00 2001 From: Boaz Brickner Date: Tue, 29 Oct 2024 15:09:23 +0100 Subject: [PATCH 291/425] [clang] [NFC] Fix assingments typo --- clang/test/CodeGenOpenCLCXX/local_addrspace_init.clcpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGenOpenCLCXX/local_addrspace_init.clcpp b/clang/test/CodeGenOpenCLCXX/local_addrspace_init.clcpp index bb9acf09d120be1..0a6462e4262413b 100644 --- a/clang/test/CodeGenOpenCLCXX/local_addrspace_init.clcpp +++ b/clang/test/CodeGenOpenCLCXX/local_addrspace_init.clcpp @@ -14,7 +14,7 @@ kernel void test() { // address space variables. User defined initialization could // make sense, but would it mean that all work items need to // execute it? Potentially disallowing any initialization would - // make things easier and assingments can be used to set specific + // make things easier and assignments can be used to set specific // values. This rules should make it consistent with OpenCL C. //__local C c(); } From 340cd4e631d72d02cd79f9aad74d2a354abc977e Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Tue, 29 Oct 2024 14:12:49 +0000 Subject: [PATCH 292/425] [Bazel] fix for abc49cc19463970d5523d7d3332e4c1f83bc2ef7 --- .../llvm-project-overlay/libc/BUILD.bazel | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 774366d48a2161f..1956233590f728d 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -136,6 +136,17 @@ libc_support_library( hdrs = ["hdr/fenv_macros.h"], ) +libc_support_library( + name = "hdr_fcntl_macros", + hdrs = ["hdr/fcntl_macros.h"], + deps = [":hdr_fcntl_overlay"], +) + +libc_support_library( + name = "hdr_fcntl_overlay", + hdrs = ["hdr/fcntl_overlay.h"], +) + libc_support_library( name = "hdr_signal_macros", hdrs = ["hdr/signal_macros.h"], @@ -201,6 +212,11 @@ libc_support_library( hdrs = ["hdr/types/fexcept_t.h"], ) +libc_support_library( + name = "types_mode_t", + hdrs = ["hdr/types/mode_t.h"], +) + libc_support_library( name = "types_sigset_t", hdrs = ["hdr/types/sigset_t.h"], @@ -3488,6 +3504,8 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", + ":types_mode_t", ], ) @@ -3503,6 +3521,8 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", + ":types_mode_t", ], ) @@ -3514,6 +3534,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3527,6 +3548,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3571,6 +3593,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3710,6 +3733,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3721,6 +3745,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3800,6 +3825,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3811,6 +3837,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3822,6 +3849,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3833,6 +3861,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3844,6 +3873,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3891,6 +3921,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -3902,6 +3933,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ], ) @@ -4161,6 +4193,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ":hdr_stdio_overlay", ":types_FILE", ], @@ -4178,6 +4211,7 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", ":llvm_libc_macros_fcntl_macros", ], ) @@ -4192,6 +4226,8 @@ libc_function( ":__support_common", ":__support_osutil_syscall", ":errno", + ":hdr_fcntl_macros", + ":types_mode_t", ], ) From f257e9bdbbb790e4fe0a5e2538c92d7edd85a2e5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Oct 2024 12:44:32 +0000 Subject: [PATCH 293/425] [clang][x86] Update AVX/AVX512 setzero constexpr tests to use the TEST_CONSTEXPR macro --- clang/test/CodeGen/X86/avx-builtins.c | 20 ++-------- clang/test/CodeGen/X86/avx512f-builtins.c | 29 +++----------- clang/test/CodeGen/X86/builtin_test_helpers.h | 38 +++++++++++++++---- 3 files changed, 41 insertions(+), 46 deletions(-) diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 9d6c1897f540d38..4e56204c8ad40fa 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -11,6 +11,7 @@ #include +#include "builtin_test_helpers.h" // NOTE: This should match the tests in llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -1740,18 +1741,21 @@ __m256d test_mm256_setzero_pd(void) { // CHECK: store <4 x double> zeroinitializer return _mm256_setzero_pd(); } +TEST_CONSTEXPR(match_m256d(_mm256_setzero_pd(), +0.0, +0.0, +0.0, +0.0)); __m256 test_mm256_setzero_ps(void) { // CHECK-LABEL: test_mm256_setzero_ps // CHECK: store <8 x float> zeroinitializer return _mm256_setzero_ps(); } +TEST_CONSTEXPR(match_m256(_mm256_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f)); __m256i test_mm256_setzero_si256(void) { // CHECK-LABEL: test_mm256_setzero_si256 // CHECK: store <4 x i64> zeroinitializer return _mm256_setzero_si256(); } +TEST_CONSTEXPR(match_m256i(_mm256_setzero_si256(), 0, 0, 0, 0)); __m256d test_mm256_shuffle_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_shuffle_pd @@ -2097,19 +2101,3 @@ float test_mm256_cvtss_f32(__m256 __a) // CHECK: extractelement <8 x float> %{{.*}}, i32 0 return _mm256_cvtss_f32(__a); } - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) - -void test_constexpr() { - constexpr __m256d v_mm256_setzero_pd = _mm256_setzero_pd(); - static_assert(v_mm256_setzero_pd[0] == +0.0 && v_mm256_setzero_pd[1] == +0.0 && v_mm256_setzero_pd[2] == +0.0 && v_mm256_setzero_pd[3] == +0.0); - - constexpr __m256 v_mm256_setzero_ps = _mm256_setzero_ps(); - static_assert(v_mm256_setzero_ps[0] == +0.0f && v_mm256_setzero_ps[1] == +0.0f && v_mm256_setzero_ps[2] == +0.0f && v_mm256_setzero_ps[3] == +0.0f && v_mm256_setzero_ps[4] == +0.0f && v_mm256_setzero_ps[5] == +0.0f && v_mm256_setzero_ps[6] == +0.0f && v_mm256_setzero_ps[7] == +0.0f); - - constexpr __m256i v_mm256_setzero_si256 = _mm256_setzero_si256(); - static_assert(v_mm256_setzero_si256[0] == 0x0000000000000000ULL && v_mm256_setzero_si256[1] == 0x0000000000000000ULL && v_mm256_setzero_si256[2] == 0x0000000000000000ULL && v_mm256_setzero_si256[3] == 0x0000000000000000ULL); -} - -#endif diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 0b4f778a0637ab7..372790a8cd668be 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -4,6 +4,7 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s #include +#include "builtin_test_helpers.h" __m512d test_mm512_sqrt_pd(__m512d a) { @@ -10615,13 +10616,13 @@ __m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) { return _mm_maskz_cvtsd_ss(__U, __A, __B); } - __m512i test_mm512_setzero_epi32(void) { // CHECK-LABEL: test_mm512_setzero_epi32 // CHECK: zeroinitializer return _mm512_setzero_epi32(); } +TEST_CONSTEXPR(match_m512i(_mm512_setzero_epi32(), 0, 0, 0, 0, 0, 0, 0, 0)); __m512 test_mm512_setzero(void) { @@ -10629,6 +10630,7 @@ __m512 test_mm512_setzero(void) // CHECK: zeroinitializer return _mm512_setzero(); } +TEST_CONSTEXPR(match_m512(_mm512_setzero(), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f)); __m512i test_mm512_setzero_si512(void) { @@ -10636,6 +10638,7 @@ __m512i test_mm512_setzero_si512(void) // CHECK: zeroinitializer return _mm512_setzero_si512(); } +TEST_CONSTEXPR(match_m512i(_mm512_setzero_si512(), 0, 0, 0, 0, 0, 0, 0, 0)); __m512 test_mm512_setzero_ps(void) { @@ -10643,6 +10646,7 @@ __m512 test_mm512_setzero_ps(void) // CHECK: zeroinitializer return _mm512_setzero_ps(); } +TEST_CONSTEXPR(match_m512(_mm512_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f)); __m512d test_mm512_setzero_pd(void) { @@ -10650,6 +10654,7 @@ __m512d test_mm512_setzero_pd(void) // CHECK: zeroinitializer return _mm512_setzero_pd(); } +TEST_CONSTEXPR(match_m512d(_mm512_setzero_pd(), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)); __mmask16 test_mm512_int2mask(int __a) { @@ -10880,25 +10885,3 @@ void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i _ // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512 _mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2); } - -// Test constexpr handling. -#if defined(__cplusplus) && (__cplusplus >= 201103L) - -void test_constexpr() { - constexpr __m512 v_mm512_setzero = _mm512_setzero(); - static_assert(v_mm512_setzero[0] == +0.0f && v_mm512_setzero[1] == +0.0f && v_mm512_setzero[2] == +0.0f && v_mm512_setzero[3] == +0.0f && v_mm512_setzero[4] == +0.0f && v_mm512_setzero[5] == +0.0f && v_mm512_setzero[6] == +0.0f && v_mm512_setzero[7] == +0.0f && v_mm512_setzero[8] == +0.0f && v_mm512_setzero[9] == +0.0f && v_mm512_setzero[10] == +0.0f && v_mm512_setzero[11] == +0.0f && v_mm512_setzero[12] == +0.0f && v_mm512_setzero[13] == +0.0f && v_mm512_setzero[14] == +0.0f && v_mm512_setzero[15] == +0.0f); - - constexpr __m512 v_mm512_setzero_ps = _mm512_setzero_ps(); - static_assert(v_mm512_setzero_ps[0] == +0.0f && v_mm512_setzero_ps[1] == +0.0f && v_mm512_setzero_ps[2] == +0.0f && v_mm512_setzero_ps[3] == +0.0f && v_mm512_setzero_ps[4] == +0.0f && v_mm512_setzero_ps[5] == +0.0f && v_mm512_setzero_ps[6] == +0.0f && v_mm512_setzero_ps[7] == +0.0f && v_mm512_setzero_ps[8] == +0.0f && v_mm512_setzero_ps[9] == +0.0f && v_mm512_setzero_ps[10] == +0.0f && v_mm512_setzero_ps[11] == +0.0f && v_mm512_setzero_ps[12] == +0.0f && v_mm512_setzero_ps[13] == +0.0f && v_mm512_setzero_ps[14] == +0.0f && v_mm512_setzero_ps[15] == +0.0f); - - constexpr __m512d v_mm512_setzero_pd = _mm512_setzero_pd(); - static_assert(v_mm512_setzero_pd[0] == +0.0 && v_mm512_setzero_pd[1] == +0.0 && v_mm512_setzero_pd[2] == +0.0 && v_mm512_setzero_pd[3] == +0.0 && v_mm512_setzero_pd[4] == +0.0 && v_mm512_setzero_pd[5] == +0.0 && v_mm512_setzero_pd[6] == +0.0 && v_mm512_setzero_pd[7] == +0.0); - - constexpr __m512i v_mm512_setzero_si512 = _mm512_setzero_si512(); - static_assert(v_mm512_setzero_si512[0] == 0x0000000000000000ULL && v_mm512_setzero_si512[1] == 0x0000000000000000ULL && v_mm512_setzero_si512[2] == 0x0000000000000000ULL && v_mm512_setzero_si512[3] == 0x0000000000000000ULL && v_mm512_setzero_si512[4] == 0x0000000000000000ULL && v_mm512_setzero_si512[5] == 0x0000000000000000ULL && v_mm512_setzero_si512[6] == 0x0000000000000000ULL && v_mm512_setzero_si512[7] == 0x0000000000000000ULL); - - constexpr __m512i v_mm512_setzero_epi32 = _mm512_setzero_epi32(); - static_assert(v_mm512_setzero_epi32[0] == 0x0000000000000000ULL && v_mm512_setzero_epi32[1] == 0x0000000000000000ULL && v_mm512_setzero_epi32[2] == 0x0000000000000000ULL && v_mm512_setzero_epi32[3] == 0x0000000000000000ULL && v_mm512_setzero_epi32[4] == 0x0000000000000000ULL && v_mm512_setzero_epi32[5] == 0x0000000000000000ULL && v_mm512_setzero_epi32[6] == 0x0000000000000000ULL && v_mm512_setzero_epi32[7] == 0x0000000000000000ULL); -} - -#endif diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h index 043b6ecbc69f18f..5e77ff3a7ca45e2 100644 --- a/clang/test/CodeGen/X86/builtin_test_helpers.h +++ b/clang/test/CodeGen/X86/builtin_test_helpers.h @@ -4,16 +4,40 @@ #if defined(__cplusplus) && (__cplusplus >= 201103L) -constexpr bool match_m128(__m128 v, float x, float y, float z, float w) { - return v[0] == x && v[1] == y && v[2] == z && v[3] == w; +constexpr bool match_m128(__m128 v, float a, float b, float c, float d) { + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; } -constexpr bool match_m128d(__m128d v, double x, double y) { - return v[0] == x && v[1] == y; +constexpr bool match_m128d(__m128d v, double a, double b) { + return v[0] == a && v[1] == b; } -constexpr bool match_m128i(__m128i v, unsigned long long x, unsigned long long y) { - return v[0] == x && v[1] == y; +constexpr bool match_m128i(__m128i v, unsigned long long a, unsigned long long b) { + return v[0] == a && v[1] == b; +} + +constexpr bool match_m256(__m256 v, float a, float b, float c, float d, float e, float f, float g, float h) { + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; +} + +constexpr bool match_m256d(__m256d v, double a, double b, double c, double d) { + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; +} + +constexpr bool match_m256i(__m256i v, unsigned long long a, unsigned long long b, unsigned long long c, unsigned long long d) { + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; +} + +constexpr bool match_m512(__m512 v, float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, float m, float n, float o, float p) { + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p; +} + +constexpr bool match_m512d(__m512d v, double a, double b, double c, double d, double e, double f, double g, double h) { + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; +} + +constexpr bool match_m512i(__m512i v, unsigned long long a, unsigned long long b, unsigned long long c, unsigned long long d, unsigned long long e, unsigned long long f, unsigned long long g, unsigned long long h) { + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; } #define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__) @@ -22,4 +46,4 @@ constexpr bool match_m128i(__m128i v, unsigned long long x, unsigned long long y #define TEST_CONSTEXPR(...) -#endif \ No newline at end of file +#endif From e281d96a81bca896692da4a07ca1423ee6dc1f53 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Oct 2024 13:49:50 +0000 Subject: [PATCH 294/425] [clang][x86] Add constexpr support for _mm_add_epi32/64 and _mm_sub_epi32/64 --- clang/lib/Headers/emmintrin.h | 16 ++++++++-------- clang/test/CodeGen/X86/builtin_test_helpers.h | 9 +++++++++ clang/test/CodeGen/X86/sse2-builtins.c | 4 ++++ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index d6494762169b25b..778cdf99a129642 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -2110,8 +2110,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a + (__v4su)__b); } @@ -2147,8 +2147,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) { /// A 128-bit vector of [2 x i64]. /// \returns A 128-bit vector of [2 x i64] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a + (__v2du)__b); } @@ -2539,8 +2539,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a - (__v4su)__b); } @@ -2573,8 +2573,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) { /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a - (__v2du)__b); } diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h index 5e77ff3a7ca45e2..f6547d4cb29ca3d 100644 --- a/clang/test/CodeGen/X86/builtin_test_helpers.h +++ b/clang/test/CodeGen/X86/builtin_test_helpers.h @@ -16,6 +16,15 @@ constexpr bool match_m128i(__m128i v, unsigned long long a, unsigned long long b return v[0] == a && v[1] == b; } +constexpr bool match_v2di(__m128i v, long long a, long long b) { + return v[0] == a && v[1] == b; +} + +constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) { + __v4si v = (__v4si)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d; +} + constexpr bool match_m256(__m256 v, float a, float b, float c, float d, float e, float f, float g, float h) { return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; } diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 4287d3d4b5ec4eb..82aa7a2d2b49d11 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -32,12 +32,14 @@ __m128i test_mm_add_epi32(__m128i A, __m128i B) { // CHECK: add <4 x i32> return _mm_add_epi32(A, B); } +TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), -9, +6, +9, -8)); __m128i test_mm_add_epi64(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_add_epi64 // CHECK: add <2 x i64> return _mm_add_epi64(A, B); } +TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), -4, +5)); __m128d test_mm_add_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_add_pd @@ -1634,12 +1636,14 @@ __m128i test_mm_sub_epi32(__m128i A, __m128i B) { // CHECK: sub <4 x i32> return _mm_sub_epi32(A, B); } +TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), +11, -10, -3, 0)); __m128i test_mm_sub_epi64(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sub_epi64 // CHECK: sub <2 x i64> return _mm_sub_epi64(A, B); } +TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), +14, -11)); __m128d test_mm_sub_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_sub_pd From 872981bd236530b160bf788aafd1cbde7b2bfb30 Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Tue, 29 Oct 2024 14:33:04 +0000 Subject: [PATCH 295/425] [Bazel] Fix layering for libc --- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 1956233590f728d..d4aeaea6fac845d 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -215,6 +215,7 @@ libc_support_library( libc_support_library( name = "types_mode_t", hdrs = ["hdr/types/mode_t.h"], + deps = [":hdr_fcntl_overlay"], ) libc_support_library( From b6a84e77b696b0d91b7cbed116d6454b6b1cc62b Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 29 Oct 2024 14:36:07 +0000 Subject: [PATCH 296/425] [AArch64] Add assembly/disassembly for FMOP4A (widening, 4-way) instructions (#113347) The new instructions are described in https://developer.arm.com/documentation/ddi0602/2024-09/SME-Instructions --- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 6 + llvm/lib/Target/AArch64/SMEInstrFormats.td | 35 +++++ .../fmop4a-fp8-fp32-widening-diagnostics.s | 120 ++++++++++++++++++ .../AArch64/SME2p2/fmop4a-fp8-fp32-widening.s | 93 ++++++++++++++ 4 files changed, 254 insertions(+) create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index ae40911cc62a877..b7165294288946d 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1041,3 +1041,9 @@ let Predicates = [HasSME2, HasSVEBFSCALE] in { defm BFMUL : sme2_bfmul_single<"bfmul">; defm BFMUL : sme2_bfmul_multi<"bfmul">; } //[HasSME2, HasSVEBFSCALE] + +let Uses = [FPMR, FPCR] in { +let Predicates = [HasSME2p2, HasSMEF8F32] in { + defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a">; +} +} diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 330c540ffde4432..e7c90b0ed14e063 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5382,3 +5382,38 @@ multiclass sme2_fmop4as_fp16_non_widening { // Multiple vectors def _M2Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; } + +class sme2_fp8_fp32_quarter_tile_outer_product + : I<(outs TileOp32:$ZAda), + (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000000001; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5-2} = 0b0000; + let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmop4a_fp8_fp32_4way { + // Single vectors + def _MZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>; +} diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s new file mode 100644 index 000000000000000..9a06192c0b30af4 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s @@ -0,0 +1,120 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f8f32 < %s 2>&1 | FileCheck %s + +// Single vectors + +fmop4a za0.d, z0.b, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.b, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b + +fmop4a za0.s, z15.b, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b + +fmop4a za0.s, z16.b, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b + +fmop4a za0.s, z0.b, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b + +fmop4a za0.s, z12.b, z17.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b + +fmop4a za0.s, z12.b, z14.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b + +fmop4a za0.s, z12.b, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b + +// Single and multiple vectors + +fmop4a za0.d, z0.b, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.b, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b + +fmop4a za0.s, z1.b, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b + +fmop4a za0.s, z16.b, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b + +fmop4a za0.s, z0.b, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.b, {z17.b-z18.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, z0.b, {z16.b-z18.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.b, {z12.b-z13.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4a za0.d, {z0.b-z1.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.b-z1.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.s-z1.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4a za0.s, {z1.b-z2.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.b-z2.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z16.b-z17.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.b-z1.b}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b + +fmop4a za0.s, {z0.b-z1.b}, z17.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b + +fmop4a za0.s, {z0.b-z1.b}, z12.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b + +// Multiple vectors + +fmop4a za0.d, {z0.b-z1.b}, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.b-z1.b}, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.s-z1.s}, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z1.b-z2.b}, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.b-z2.b}, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z18.b-z19.b}, {z16.b-z17.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.b-z1.b}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.b-z1.b}, {z19.b-z20.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.b-z1.b}, {z18.b-z20.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.b-z1.b}, {z10.b-z11.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s new file mode 100644 index 000000000000000..9e378bcf3d75335 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s @@ -0,0 +1,93 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f8f32 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f8f32 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-f8f32 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f8f32 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f8f32 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f8f32 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// Single vectors + +fmop4a za0.s, z0.b, z16.b // 10000000-00100000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.b, z16.b +// CHECK-ENCODING: [0x00,0x00,0x20,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 80200000 + +fmop4a za1.s, z10.b, z20.b // 10000000-00100100-00000001-01000001 +// CHECK-INST: fmop4a za1.s, z10.b, z20.b +// CHECK-ENCODING: [0x41,0x01,0x24,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 80240141 + +fmop4a za3.s, z14.b, z30.b // 10000000-00101110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.b, z30.b +// CHECK-ENCODING: [0xc3,0x01,0x2e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 802e01c3 + +// Single and multiple vectors + +fmop4a za0.s, z0.b, {z16.b-z17.b} // 10000000-00110000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.b, { z16.b, z17.b } +// CHECK-ENCODING: [0x00,0x00,0x30,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 80300000 + +fmop4a za1.s, z10.b, {z20.b-z21.b} // 10000000-00110100-00000001-01000001 +// CHECK-INST: fmop4a za1.s, z10.b, { z20.b, z21.b } +// CHECK-ENCODING: [0x41,0x01,0x34,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 80340141 + +fmop4a za3.s, z14.b, {z30.b-z31.b} // 10000000-00111110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.b, { z30.b, z31.b } +// CHECK-ENCODING: [0xc3,0x01,0x3e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 803e01c3 + +// Multiple and single vectors + +fmop4a za0.s, {z0.b-z1.b}, z16.b // 10000000-00100000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.b, z1.b }, z16.b +// CHECK-ENCODING: [0x00,0x02,0x20,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 80200200 + +fmop4a za1.s, {z10.b-z11.b}, z20.b // 10000000-00100100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.b, z11.b }, z20.b +// CHECK-ENCODING: [0x41,0x03,0x24,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 80240341 + +fmop4a za3.s, {z14.b-z15.b}, z30.b // 10000000-00101110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.b, z15.b }, z30.b +// CHECK-ENCODING: [0xc3,0x03,0x2e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 802e03c3 + +// Multiple vectors + +fmop4a za0.s, {z0.b-z1.b}, {z16.b-z17.b} // 10000000-00110000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.b, z1.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x00,0x02,0x30,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 80300200 + +fmop4a za1.s, {z10.b-z11.b}, {z20.b-z21.b} // 10000000-00110100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x41,0x03,0x34,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 80340341 + +fmop4a za3.s, {z14.b-z15.b}, {z30.b-z31.b} // 10000000-00111110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.b, z15.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc3,0x03,0x3e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32 +// CHECK-UNKNOWN: 803e03c3 From a388df712700f38ad9a51d49a657a28e739f5eb4 Mon Sep 17 00:00:00 2001 From: Sebastian Kreutzer Date: Tue, 29 Oct 2024 15:40:53 +0100 Subject: [PATCH 297/425] [XRay] Remove reliance on default PIC behavior in DSO tests (#113892) Compiling with `-fxray-shared` requires position-independent code (introduced in #113548). Some tests do not explicitly specify this, thus falling back to the compiler default. If, for example, Clang is compiled with `-DCLANG_DEFAULT_PIE_ON_LINUX=OFF`, these checks fail. This patch addresses this issue in two tests: - Removing a check in `xray-shared.cpp` that only tests default PIC behavior - Adding `-fPIC` explicitly in `clang-xray-shared.cpp` --- clang/test/Driver/XRay/xray-shared.cpp | 1 - compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/test/Driver/XRay/xray-shared.cpp b/clang/test/Driver/XRay/xray-shared.cpp index 215854e1fc7cefe..e331fefed1e0c9f 100644 --- a/clang/test/Driver/XRay/xray-shared.cpp +++ b/clang/test/Driver/XRay/xray-shared.cpp @@ -1,6 +1,5 @@ // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fpic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s -// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-PIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-pic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC diff --git a/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp b/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp index 92f3c29e970d42c..0dd721571de9b8a 100644 --- a/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp @@ -1,6 +1,6 @@ // Test that the DSO-local runtime library has been linked if -fxray-shared is passed. // -// RUN: %clangxx -fxray-instrument -fxray-shared %s -shared -o %t.so +// RUN: %clangxx -fxray-instrument -fxray-shared -fPIC %s -shared -o %t.so // RUN: llvm-nm %t.so | FileCheck %s --check-prefix ENABLED // RUN: %clangxx -fxray-instrument %s -shared -o %t.so From 2e612f8d868b3fb88a44964a3d4efd61ee63e06a Mon Sep 17 00:00:00 2001 From: goldsteinn <35538541+goldsteinn@users.noreply.github.com> Date: Tue, 29 Oct 2024 07:41:59 -0700 Subject: [PATCH 298/425] [MLIR][Arith] Improve accuracy of `inferDivU` (#113789) 1) We can always bound the maximum with the numerator. - https://alive2.llvm.org/ce/z/PqHvuT 2) Even if denominator min can be zero, we can still bound the minimum result with `lhs.umin u/ rhs.umax`. This is similar to https://github.com/llvm/llvm-project/pull/110169 --- .../Interfaces/Utils/InferIntRangeCommon.cpp | 10 +++++++-- .../Dialect/Arith/int-range-interface.mlir | 21 +++++++++++++++---- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp index ec9ed87723e1cc5..a2acf3e732adab0 100644 --- a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp +++ b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp @@ -298,8 +298,14 @@ static ConstantIntRanges inferDivURange(const ConstantIntRanges &lhs, return minMaxBy(udiv, {lhsMin, lhsMax}, {rhsMin, rhsMax}, /*isSigned=*/false); } - // Otherwise, it's possible we might divide by 0. - return ConstantIntRanges::maxRange(rhsMin.getBitWidth()); + + APInt umin = APInt::getZero(rhsMin.getBitWidth()); + if (lhsMin.uge(rhsMax) && !rhsMax.isZero()) + umin = lhsMin.udiv(rhsMax); + + // X u/ Y u<= X. + APInt umax = lhsMax; + return ConstantIntRanges::fromUnsigned(umin, umax); } ConstantIntRanges diff --git a/mlir/test/Dialect/Arith/int-range-interface.mlir b/mlir/test/Dialect/Arith/int-range-interface.mlir index 4b04229e5db52f0..6d66da2fc1eb35c 100644 --- a/mlir/test/Dialect/Arith/int-range-interface.mlir +++ b/mlir/test/Dialect/Arith/int-range-interface.mlir @@ -178,8 +178,8 @@ func.func @div_bounds_negative(%arg0 : index) -> i1 { } // CHECK-LABEL: func @div_zero_undefined -// CHECK: %[[ret:.*]] = arith.cmpi ule -// CHECK: return %[[ret]] +// CHECK: %[[true:.*]] = arith.constant true +// CHECK: return %[[true]] func.func @div_zero_undefined(%arg0 : index) -> i1 { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -190,6 +190,19 @@ func.func @div_zero_undefined(%arg0 : index) -> i1 { func.return %2 : i1 } +// CHECK-LABEL: func @div_refine_min +// CHECK: %[[true:.*]] = arith.constant true +// CHECK: return %[[true]] +func.func @div_refine_min(%arg0 : index) -> i1 { + %c0 = arith.constant 1 : index + %c1 = arith.constant 2 : index + %c4 = arith.constant 4 : index + %0 = arith.andi %arg0, %c1 : index + %1 = arith.divui %c4, %0 : index + %2 = arith.cmpi uge, %1, %c0 : index + func.return %2 : i1 +} + // CHECK-LABEL: func @ceil_divui // CHECK: %[[ret:.*]] = arith.cmpi eq // CHECK: return %[[ret]] @@ -271,13 +284,13 @@ func.func @remui_base(%arg0 : index, %arg1 : index ) -> i1 { // CHECK: return %[[true]] func.func @remui_base_maybe_zero(%arg0 : index, %arg1 : index ) -> i1 { %c4 = arith.constant 4 : index - %c5 = arith.constant 5 : index + %c5 = arith.constant 5 : index %0 = arith.minui %arg1, %c4 : index %1 = arith.remui %arg0, %0 : index %2 = arith.cmpi ult, %1, %c5 : index func.return %2 : i1 -} +} // CHECK-LABEL: func @remsi_base // CHECK: %[[ret:.*]] = arith.cmpi sge From 80a09735ac8bd6e31c824b41f7ee35952e440662 Mon Sep 17 00:00:00 2001 From: Elvina Yakubova Date: Tue, 29 Oct 2024 14:43:01 +0000 Subject: [PATCH 299/425] =?UTF-8?q?Revert=20"[clang][AArch64]=20Add=20getH?= =?UTF-8?q?ostCPUFeatures=20to=20query=20for=20enabled=20=E2=80=A6=20(#114?= =?UTF-8?q?066)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …features in cpu info (#97749)" This reverts commit d732c0b13c55259177f2936516b6087d634078e0. This is breaking buildbots https://lab.llvm.org/buildbot/#/builders/190/builds/8413, https://lab.llvm.org/buildbot/#/builders/56/builds/10880 and a few others. --- clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 20 +-- clang/test/Driver/Inputs/cpunative/cortex-a57 | 8 - clang/test/Driver/Inputs/cpunative/cortex-a72 | 8 - clang/test/Driver/Inputs/cpunative/cortex-a76 | 8 - .../test/Driver/Inputs/cpunative/neoverse-n1 | 8 - .../test/Driver/Inputs/cpunative/neoverse-v2 | 8 - clang/test/Driver/aarch64-mcpu-native.c | 138 ------------------ llvm/lib/TargetParser/Host.cpp | 10 +- 8 files changed, 10 insertions(+), 198 deletions(-) delete mode 100644 clang/test/Driver/Inputs/cpunative/cortex-a57 delete mode 100644 clang/test/Driver/Inputs/cpunative/cortex-a72 delete mode 100644 clang/test/Driver/Inputs/cpunative/cortex-a76 delete mode 100644 clang/test/Driver/Inputs/cpunative/neoverse-n1 delete mode 100644 clang/test/Driver/Inputs/cpunative/neoverse-v2 delete mode 100644 clang/test/Driver/aarch64-mcpu-native.c diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 1e2ac4e501bafd1..f083e40df131449 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -135,21 +135,15 @@ getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March, return true; } -static bool getAArch64ArchFeaturesFromMcpu( - const Driver &D, StringRef Mcpu, const ArgList &Args, - llvm::AArch64::ExtensionSet &Extensions, std::vector &Features) { +static bool +getAArch64ArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, + const ArgList &Args, + llvm::AArch64::ExtensionSet &Extensions) { StringRef CPU; std::string McpuLowerCase = Mcpu.lower(); if (!DecodeAArch64Mcpu(D, McpuLowerCase, CPU, Extensions)) return false; - if (Mcpu == "native") { - llvm::StringMap HostFeatures = llvm::sys::getHostCPUFeatures(); - for (auto &[Feature, Enabled] : HostFeatures) { - Features.push_back(Args.MakeArgString((Enabled ? "+" : "-") + Feature)); - } - } - return true; } @@ -216,11 +210,11 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Extensions); else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) - success = getAArch64ArchFeaturesFromMcpu(D, A->getValue(), Args, Extensions, - Features); + success = + getAArch64ArchFeaturesFromMcpu(D, A->getValue(), Args, Extensions); else if (isCPUDeterminedByTriple(Triple)) success = getAArch64ArchFeaturesFromMcpu( - D, getAArch64TargetCPU(Args, Triple, A), Args, Extensions, Features); + D, getAArch64TargetCPU(Args, Triple, A), Args, Extensions); else // Default to 'A' profile if the architecture is not specified. success = getAArch64ArchFeaturesFromMarch(D, "armv8-a", Args, Extensions); diff --git a/clang/test/Driver/Inputs/cpunative/cortex-a57 b/clang/test/Driver/Inputs/cpunative/cortex-a57 deleted file mode 100644 index e1903012ab79ccd..000000000000000 --- a/clang/test/Driver/Inputs/cpunative/cortex-a57 +++ /dev/null @@ -1,8 +0,0 @@ -processor : 0 -BogoMIPS : 200.00 -Features : fp asimd evtstrm crc32 cpuid -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x1 -CPU part : 0xd07 -CPU revision : 1 diff --git a/clang/test/Driver/Inputs/cpunative/cortex-a72 b/clang/test/Driver/Inputs/cpunative/cortex-a72 deleted file mode 100644 index 7aed4a6fa732369..000000000000000 --- a/clang/test/Driver/Inputs/cpunative/cortex-a72 +++ /dev/null @@ -1,8 +0,0 @@ -processor : 0 -BogoMIPS : 250.00 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 cpuid asimdrdm -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x0 -CPU part : 0xd08 -CPU revision : 2 diff --git a/clang/test/Driver/Inputs/cpunative/cortex-a76 b/clang/test/Driver/Inputs/cpunative/cortex-a76 deleted file mode 100644 index 21822cfcec60b0e..000000000000000 --- a/clang/test/Driver/Inputs/cpunative/cortex-a76 +++ /dev/null @@ -1,8 +0,0 @@ -processor : 0 -BogoMIPS : 500.00 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm ssbs jscvt fcma -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x1 -CPU part : 0xd0b -CPU revision : 2 diff --git a/clang/test/Driver/Inputs/cpunative/neoverse-n1 b/clang/test/Driver/Inputs/cpunative/neoverse-n1 deleted file mode 100644 index 571e8840b09f08a..000000000000000 --- a/clang/test/Driver/Inputs/cpunative/neoverse-n1 +++ /dev/null @@ -1,8 +0,0 @@ -processor : 0 -BogoMIPS : 50.00 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop asimddp ssbs -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x3 -CPU part : 0xd0c -CPU revision : 1 diff --git a/clang/test/Driver/Inputs/cpunative/neoverse-v2 b/clang/test/Driver/Inputs/cpunative/neoverse-v2 deleted file mode 100644 index c3c8433415d7a08..000000000000000 --- a/clang/test/Driver/Inputs/cpunative/neoverse-v2 +++ /dev/null @@ -1,8 +0,0 @@ -processor : 0 -BogoMIPS : 2000.00 -Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs sb paca pacg dcpodp sve2 sveaes svepmull svebitperm svesha3 svesm4 flagm2 frint svei8mm svebf16 i8mm bf16 dgh bti -CPU implementer : 0x41 -CPU architecture: 8 -CPU variant : 0x0 -CPU part : 0xd4f -CPU revision : 0 diff --git a/clang/test/Driver/aarch64-mcpu-native.c b/clang/test/Driver/aarch64-mcpu-native.c deleted file mode 100644 index f1d0ba76ad79c49..000000000000000 --- a/clang/test/Driver/aarch64-mcpu-native.c +++ /dev/null @@ -1,138 +0,0 @@ -// REQUIRES: aarch64-registered-target -// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/neoverse-v2 -// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-NV2 --implicit-check-not=FEAT_ %s - -// CHECK-FEAT-NV2: Extensions enabled for the given AArch64 target -// CHECK-FEAT-NV2-EMPTY: -// CHECK-FEAT-NV2: Architecture Feature(s) Description -// CHECK-FEAT-NV2: FEAT_AES, FEAT_PMULL Enable AES support -// CHECK-FEAT-NV2: FEAT_AMUv1 Enable Armv8.4-A Activity Monitors extension -// CHECK-FEAT-NV2: FEAT_AdvSIMD Enable Advanced SIMD instructions -// CHECK-FEAT-NV2: FEAT_BF16 Enable BFloat16 Extension -// CHECK-FEAT-NV2: FEAT_BTI Enable Branch Target Identification -// CHECK-FEAT-NV2: FEAT_CCIDX Enable Armv8.3-A Extend of the CCSIDR number of sets -// CHECK-FEAT-NV2: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions -// CHECK-FEAT-NV2: FEAT_CSV2_2 Enable architectural speculation restriction -// CHECK-FEAT-NV2: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions -// CHECK-FEAT-NV2: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence -// CHECK-FEAT-NV2: FEAT_DPB2 Enable Armv8.5-A Cache Clean to Point of Deep Persistence -// CHECK-FEAT-NV2: FEAT_DotProd Enable dot product support -// CHECK-FEAT-NV2: FEAT_ETE Enable Embedded Trace Extension -// CHECK-FEAT-NV2: FEAT_FCMA Enable Armv8.3-A Floating-point complex number support -// CHECK-FEAT-NV2: FEAT_FHM Enable FP16 FML instructions -// CHECK-FEAT-NV2: FEAT_FP Enable Armv8.0-A Floating Point Extensions -// CHECK-FEAT-NV2: FEAT_FP16 Enable half-precision floating-point data processing -// CHECK-FEAT-NV2: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int -// CHECK-FEAT-NV2: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions -// CHECK-FEAT-NV2: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons -// CHECK-FEAT-NV2: FEAT_I8MM Enable Matrix Multiply Int8 Extension -// CHECK-FEAT-NV2: FEAT_JSCVT Enable Armv8.3-A JavaScript FP conversion instructions -// CHECK-FEAT-NV2: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension -// CHECK-FEAT-NV2: FEAT_LRCPC Enable support for RCPC extension -// CHECK-FEAT-NV2: FEAT_LRCPC2 Enable Armv8.4-A RCPC instructions with Immediate Offsets -// CHECK-FEAT-NV2: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions -// CHECK-FEAT-NV2: FEAT_LSE2 Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules -// CHECK-FEAT-NV2: FEAT_MPAM Enable Armv8.4-A Memory system Partitioning and Monitoring extension -// CHECK-FEAT-NV2: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension -// CHECK-FEAT-NV2: FEAT_NV, FEAT_NV2 Enable Armv8.4-A Nested Virtualization Enchancement -// CHECK-FEAT-NV2: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension -// CHECK-FEAT-NV2: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants -// CHECK-FEAT-NV2: FEAT_PAuth Enable Armv8.3-A Pointer Authentication extension -// CHECK-FEAT-NV2: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension -// CHECK-FEAT-NV2: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions -// CHECK-FEAT-NV2: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions -// CHECK-FEAT-NV2: FEAT_RNG Enable Random Number generation instructions -// CHECK-FEAT-NV2: FEAT_SB Enable Armv8.5-A Speculation Barrier -// CHECK-FEAT-NV2: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension -// CHECK-FEAT-NV2: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support -// CHECK-FEAT-NV2: FEAT_SPE Enable Statistical Profiling extension -// CHECK-FEAT-NV2: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions -// CHECK-FEAT-NV2: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit -// CHECK-FEAT-NV2: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions -// CHECK-FEAT-NV2: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions -// CHECK-FEAT-NV2: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions -// CHECK-FEAT-NV2: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions -// CHECK-FEAT-NV2: FEAT_TRBE Enable Trace Buffer Extension -// CHECK-FEAT-NV2: FEAT_TRF Enable Armv8.4-A Trace extension -// CHECK-FEAT-NV2: FEAT_UAO Enable Armv8.2-A UAO PState -// CHECK-FEAT-NV2: FEAT_VHE Enable Armv8.1-A Virtual Host extension - -// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/neoverse-n1 -// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-NN1 --implicit-check-not=FEAT_ %s - -// CHECK-FEAT-NN1: Extensions enabled for the given AArch64 target -// CHECK-FEAT-NN1-EMPTY: -// CHECK-FEAT-NN1: Architecture Feature(s) Description -// CHECK-FEAT-NN1: FEAT_AES, FEAT_PMULL Enable AES support -// CHECK-FEAT-NN1: FEAT_AdvSIMD Enable Advanced SIMD instructions -// CHECK-FEAT-NN1: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions -// CHECK-FEAT-NN1: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence -// CHECK-FEAT-NN1: FEAT_DotProd Enable dot product support -// CHECK-FEAT-NN1: FEAT_FP Enable Armv8.0-A Floating Point Extensions -// CHECK-FEAT-NN1: FEAT_FP16 Enable half-precision floating-point data processing -// CHECK-FEAT-NN1: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension -// CHECK-FEAT-NN1: FEAT_LRCPC Enable support for RCPC extension -// CHECK-FEAT-NN1: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions -// CHECK-FEAT-NN1: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension -// CHECK-FEAT-NN1: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants -// CHECK-FEAT-NN1: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension -// CHECK-FEAT-NN1: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions -// CHECK-FEAT-NN1: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions -// CHECK-FEAT-NN1: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support -// CHECK-FEAT-NN1: FEAT_SPE Enable Statistical Profiling extension -// CHECK-FEAT-NN1: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit -// CHECK-FEAT-NN1: FEAT_UAO Enable Armv8.2-A UAO PState -// CHECK-FEAT-NN1: FEAT_VHE Enable Armv8.1-A Virtual Host extension - - -// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/cortex-a57 -// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-CA57 --implicit-check-not=FEAT_ %s - -// CHECK-FEAT-CA57: Extensions enabled for the given AArch64 target -// CHECK-FEAT-CA57-EMPTY: -// CHECK-FEAT-CA57: Architecture Feature(s) Description -// CHECK-FEAT-CA57: FEAT_AES, FEAT_PMULL Enable AES support -// CHECK-FEAT-CA57: FEAT_AdvSIMD Enable Advanced SIMD instructions -// CHECK-FEAT-CA57: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions -// CHECK-FEAT-CA57: FEAT_FP Enable Armv8.0-A Floating Point Extensions -// CHECK-FEAT-CA57: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension -// CHECK-FEAT-CA57: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support - -// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/cortex-a72 -// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-CA72 --implicit-check-not=FEAT_ %s - -// CHECK-FEAT-CA72: Extensions enabled for the given AArch64 target -// CHECK-EMPTY: -// CHECK-FEAT-CA72: Architecture Feature(s) Description -// CHECK-FEAT-CA72: FEAT_AES, FEAT_PMULL Enable AES support -// CHECK-FEAT-CA72: FEAT_AdvSIMD Enable Advanced SIMD instructions -// CHECK-FEAT-CA72: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions -// CHECK-FEAT-CA72: FEAT_FP Enable Armv8.0-A Floating Point Extensions -// CHECK-FEAT-CA72: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension -// CHECK-FEAT-CA72: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support - -// RUN: export LLVM_CPUINFO=%S/Inputs/cpunative/cortex-a76 -// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=native | FileCheck --strict-whitespace --check-prefix=CHECK-FEAT-CA76 --implicit-check-not=FEAT_ %s - -// CHECK-FEAT-CA76: Extensions enabled for the given AArch64 target -// CHECK-FEAT-CA76-EMPTY: -// CHECK-FEAT-CA76: Architecture Feature(s) Description -// CHECK-FEAT-CA76: FEAT_AES, FEAT_PMULL Enable AES support -// CHECK-FEAT-CA76: FEAT_AdvSIMD Enable Advanced SIMD instructions -// CHECK-FEAT-CA76: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions -// CHECK-FEAT-CA76: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence -// CHECK-FEAT-CA76: FEAT_DotProd Enable dot product support -// CHECK-FEAT-CA76: FEAT_FP Enable Armv8.0-A Floating Point Extensions -// CHECK-FEAT-CA76: FEAT_FP16 Enable half-precision floating-point data processing -// CHECK-FEAT-CA76: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension -// CHECK-FEAT-CA76: FEAT_LRCPC Enable support for RCPC extension -// CHECK-FEAT-CA76: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions -// CHECK-FEAT-CA76: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension -// CHECK-FEAT-CA76: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants -// CHECK-FEAT-CA76: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension -// CHECK-FEAT-CA76: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions -// CHECK-FEAT-CA76: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions -// CHECK-FEAT-CA76: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support -// CHECK-FEAT-CA76: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit -// CHECK-FEAT-CA76: FEAT_UAO Enable Armv8.2-A UAO PState -// CHECK-FEAT-CA76: FEAT_VHE Enable Armv8.1-A Virtual Host extension diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index de6c4edebba39a6..5c4e3a9dc52b0f9 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -68,15 +68,11 @@ using namespace llvm; static std::unique_ptr LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { - const char *CPUInfoFile = "/proc/cpuinfo"; - if (const char *CpuinfoIntercept = std::getenv("LLVM_CPUINFO")) - CPUInfoFile = CpuinfoIntercept; llvm::ErrorOr> Text = - llvm::MemoryBuffer::getFileAsStream(CPUInfoFile); - + llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); if (std::error_code EC = Text.getError()) { - llvm::errs() << "Can't read " << CPUInfoFile << ": " << EC.message() - << "\n"; + llvm::errs() << "Can't read " + << "/proc/cpuinfo: " << EC.message() << "\n"; return nullptr; } return std::move(*Text); From c9d9dc9c24039d85fdf3036368c9fba3d68722fa Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 29 Oct 2024 10:48:18 -0400 Subject: [PATCH 300/425] [libc++] Remove _LIBCPP_ENABLE_ASSERTIONS, which had been deprecated (#113592) --- libcxx/CMakeLists.txt | 8 ++--- libcxx/docs/ReleaseNotes/20.rst | 11 ++---- libcxx/include/__config | 11 ++---- ...assertions_enables_extensive_mode.pass.cpp | 35 ------------------- 4 files changed, 7 insertions(+), 58 deletions(-) delete mode 100644 libcxx/test/libcxx/assertions/modes/enabling_assertions_enables_extensive_mode.pass.cpp diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 574b262018cd3a5..95a7d10f055ea70 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -45,10 +45,6 @@ include(CMakeDependentOption) include(HandleCompilerRT) # Basic options --------------------------------------------------------------- -option(LIBCXX_ENABLE_ASSERTIONS - "Enable assertions inside the compiled library, and at the same time make it the - default when compiling user code. Note that assertions can be enabled or disabled - by users in their own code regardless of this option." OFF) option(LIBCXX_ENABLE_SHARED "Build libc++ as a shared library." ON) option(LIBCXX_ENABLE_STATIC "Build libc++ as a static library." ON) option(LIBCXX_ENABLE_FILESYSTEM @@ -759,9 +755,9 @@ config_define_if_not(LIBCXX_ENABLE_WIDE_CHARACTERS _LIBCPP_HAS_NO_WIDE_CHARACTER config_define_if_not(LIBCXX_ENABLE_TIME_ZONE_DATABASE _LIBCPP_HAS_NO_TIME_ZONE_DATABASE) config_define_if_not(LIBCXX_ENABLE_VENDOR_AVAILABILITY_ANNOTATIONS _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# TODO: Remove in LLVM 21. We're leaving an error to make this fail explicitly. if (LIBCXX_ENABLE_ASSERTIONS) - message(DEPRECATION "LIBCXX_ENABLE_ASSERTIONS is deprecated and will be removed in LLVM 20. Please use LIBCXX_HARDENING_MODE instead.") - set(LIBCXX_HARDENING_MODE "extensive") + message(FATAL_ERROR "LIBCXX_ENABLE_ASSERTIONS has been removed. Please use LIBCXX_HARDENING_MODE instead.") endif() if (LIBCXX_HARDENING_MODE STREQUAL "none") config_define(2 _LIBCPP_HARDENING_MODE_DEFAULT) diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 84080e7cbafe2c4..38b8df3b2a7718b 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -64,8 +64,9 @@ Improvements and New Features Deprecations and Removals ------------------------- -- TODO: The ``LIBCXX_ENABLE_ASSERTIONS`` CMake variable and the ``_LIBCPP_ENABLE_ASSERTIONS`` macro that were used to - enable the safe mode will be removed in LLVM 20. +- The ``LIBCXX_ENABLE_ASSERTIONS`` CMake variable and the ``_LIBCPP_ENABLE_ASSERTIONS`` macro that were used to + enable the safe mode have been removed in LLVM 20. Please use :ref:`support for hardening ` + instead. - Support for the C++20 synchronization library (````, ````, ``atomic::wait``, etc.) has been removed in language modes prior to C++20. If you are using these features prior to C++20, you will need to @@ -91,12 +92,6 @@ Deprecations and Removals Upcoming Deprecations and Removals ---------------------------------- -LLVM 20 -~~~~~~~ - -- TODO - - LLVM 21 ~~~~~~~ diff --git a/libcxx/include/__config b/libcxx/include/__config index fc09a97274d7c78..1cf80a46686ab91 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -39,16 +39,9 @@ // HARDENING { -// This is for backward compatibility -- make enabling `_LIBCPP_ENABLE_ASSERTIONS` (which predates hardening modes) -// equivalent to setting the extensive mode. This is deprecated and will be removed in LLVM 20. +// TODO: Remove in LLVM 21. We're making this an error to catch folks who might not have migrated. # ifdef _LIBCPP_ENABLE_ASSERTIONS -# warning "_LIBCPP_ENABLE_ASSERTIONS is deprecated, please use _LIBCPP_HARDENING_MODE instead" -# if _LIBCPP_ENABLE_ASSERTIONS != 0 && _LIBCPP_ENABLE_ASSERTIONS != 1 -# error "_LIBCPP_ENABLE_ASSERTIONS must be set to 0 or 1" -# endif -# if _LIBCPP_ENABLE_ASSERTIONS -# define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_EXTENSIVE -# endif +# error "_LIBCPP_ENABLE_ASSERTIONS has been removed, please use _LIBCPP_HARDENING_MODE instead" # endif // The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values: diff --git a/libcxx/test/libcxx/assertions/modes/enabling_assertions_enables_extensive_mode.pass.cpp b/libcxx/test/libcxx/assertions/modes/enabling_assertions_enables_extensive_mode.pass.cpp deleted file mode 100644 index c496fc32dc939f3..000000000000000 --- a/libcxx/test/libcxx/assertions/modes/enabling_assertions_enables_extensive_mode.pass.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// TODO(hardening): remove in LLVM 20. -// This test ensures that enabling assertions with the legacy `_LIBCPP_ENABLE_ASSERTIONS` now enables the extensive -// hardening mode. - -// `check_assertion.h` is only available starting from C++11 and requires Unix headers and regex support. -// REQUIRES: has-unix-headers -// UNSUPPORTED: c++03, no-localization -// The ability to set a custom abort message is required to compare the assertion message (which only happens in the -// debug mode). -// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing -// HWASAN replaces TRAP with abort or error exit code. -// XFAIL: hwasan -// Note that GCC doesn't support `-Wno-macro-redefined`. -// ADDITIONAL_COMPILE_FLAGS: -U_LIBCPP_HARDENING_MODE -D_LIBCPP_ENABLE_ASSERTIONS=1 -Wno-#warnings -Wno-cpp - -#include -#include "check_assertion.h" - -int main(int, char**) { - static_assert(_LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_EXTENSIVE, - "The extensive hardening mode should be implicitly enabled"); - - _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(true, "Should not fire"); - TEST_LIBCPP_ASSERT_FAILURE([] { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "Should fire"); }(), "Should fire"); - - return 0; -} From e268398fa89c9cc7901ea9b7386fc693023be203 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 29 Oct 2024 10:50:06 -0400 Subject: [PATCH 301/425] [NFC][AMDGPU] Use `!foreach` to replace explicit list of registers (#114005) --- llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 126 +++++--------------- 1 file changed, 29 insertions(+), 97 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 21412044d5a0139..80969fce3d77fb5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -22,19 +22,13 @@ def CC_SI_Gfx : CallingConv<[ // 32 is reserved for the stack pointer // 33 is reserved for the frame pointer // 34 is reserved for the base pointer - CCIfInReg>>, - - CCIfNotInReg>>, + CCIfInReg("SGPR"#i)) // SGPR4-29 + >>>, + + CCIfNotInReg("VGPR"#i)) // VGPR0-31 + >>>, CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> ]>; @@ -43,93 +37,35 @@ def RetCC_SI_Gfx : CallingConv<[ CCIfType<[i1], CCPromoteToType>, CCIfType<[i1, i16], CCIfExtend>>, - CCIfNotInReg>>, + CCIfNotInReg("VGPR"#i)) // VGPR0-135 + >>>, ]>; def CC_SI_SHADER : CallingConv<[ CCIfType<[i1], CCPromoteToType>, - - CCIfInReg>>, + + CCIfInReg("SGPR"#i)) // SGPR0-43 + >>>, // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs. - CCIfNotInReg>> + CCIfNotInReg("VGPR"#i)) // VGPR0-135 + >>> ]>; def RetCC_SI_Shader : CallingConv<[ CCIfType<[i1, i16], CCIfExtend>>, - CCIfType<[i32, i16, v2i16] , CCAssignToReg<[ - SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7, - SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15, - SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23, - SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31, - SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39, - SGPR40, SGPR41, SGPR42, SGPR43 - ]>>, + CCIfType<[i32, i16, v2i16] , CCAssignToReg< + !foreach(i, !range(0, 44), !cast("SGPR"#i)) // SGPR0-43 + >>, // 32*4 + 4 is the minimum for a fetch shader with 32 outputs. - CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg<[ - VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, - VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, - VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, - VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31, - VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39, - VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47, - VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55, - VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63, - VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71, - VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79, - VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87, - VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95, - VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103, - VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111, - VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119, - VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127, - VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135 - ]>> + CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg< + !foreach(i, !range(0, 136), !cast("VGPR"#i)) // VGPR0-135 + >> ]>; def CSR_AMDGPU_VGPRs : CalleeSavedRegs< @@ -194,11 +130,9 @@ def CC_AMDGPU_Func : CallingConv<[ !foreach(i, !range(0, 30), !cast("SGPR"#i)) // SGPR0-29 >>>, - CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg<[ - VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, - VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, - VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, - VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg< + !foreach(i, !range(0, 32), !cast("VGPR"#i)) // VGPR0-31 + >>, CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>> ]>; @@ -206,11 +140,9 @@ def CC_AMDGPU_Func : CallingConv<[ def RetCC_AMDGPU_Func : CallingConv<[ CCIfType<[i1], CCPromoteToType>, CCIfType<[i1, i16], CCIfExtend>>, - CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg<[ - VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, - VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15, - VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23, - VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, + CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg< + !foreach(i, !range(0, 32), !cast("VGPR"#i)) // VGPR0-31 + >>, ]>; def CC_AMDGPU : CallingConv<[ From 75e7ba8c0b7efe75632d328a80391b9086ba8740 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Tue, 29 Oct 2024 07:56:05 -0700 Subject: [PATCH 302/425] [HLSL] Re-implement countbits with the correct return type (#113189) Restricts hlsl countbits to always return a uint32. Implements a lowering from llvm.ctpop which has an overloaded return type to dxil cbits op which always returns uint32. Closes #112779 --- clang/lib/Headers/hlsl/hlsl_intrinsics.h | 124 +++++++++++------- .../test/CodeGenHLSL/builtins/countbits.hlsl | 62 ++++++--- .../SemaHLSL/BuiltIns/countbits-errors.hlsl | 14 +- llvm/lib/Target/DirectX/DXIL.td | 5 +- llvm/lib/Target/DirectX/DXILOpLowering.cpp | 70 ++++++++++ llvm/test/CodeGen/DirectX/countbits.ll | 46 ++++++- 6 files changed, 234 insertions(+), 87 deletions(-) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index 8ade4b27f360fbc..d9f3a17ea23d8e7 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -723,66 +723,88 @@ float4 cosh(float4); #ifdef __HLSL_ENABLE_16_BIT _HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int16_t countbits(int16_t); +const inline uint countbits(int16_t x) { + return __builtin_elementwise_popcount(x); +} _HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int16_t2 countbits(int16_t2); +const inline uint2 countbits(int16_t2 x) { + return __builtin_elementwise_popcount(x); +} _HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int16_t3 countbits(int16_t3); +const inline uint3 countbits(int16_t3 x) { + return __builtin_elementwise_popcount(x); +} _HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int16_t4 countbits(int16_t4); +const inline uint4 countbits(int16_t4 x) { + return __builtin_elementwise_popcount(x); +} _HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint16_t countbits(uint16_t); +const inline uint countbits(uint16_t x) { + return __builtin_elementwise_popcount(x); +} _HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint16_t2 countbits(uint16_t2); +const inline uint2 countbits(uint16_t2 x) { + return __builtin_elementwise_popcount(x); +} _HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint16_t3 countbits(uint16_t3); +const inline uint3 countbits(uint16_t3 x) { + return __builtin_elementwise_popcount(x); +} _HLSL_AVAILABILITY(shadermodel, 6.2) -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint16_t4 countbits(uint16_t4); +const inline uint4 countbits(uint16_t4 x) { + return __builtin_elementwise_popcount(x); +} #endif -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int countbits(int); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int2 countbits(int2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int3 countbits(int3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int4 countbits(int4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint countbits(uint); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint2 countbits(uint2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint3 countbits(uint3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint4 countbits(uint4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int64_t countbits(int64_t); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int64_t2 countbits(int64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int64_t3 countbits(int64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -int64_t4 countbits(int64_t4); - -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint64_t countbits(uint64_t); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint64_t2 countbits(uint64_t2); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint64_t3 countbits(uint64_t3); -_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount) -uint64_t4 countbits(uint64_t4); +const inline uint countbits(int x) { return __builtin_elementwise_popcount(x); } +const inline uint2 countbits(int2 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint3 countbits(int3 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint4 countbits(int4 x) { + return __builtin_elementwise_popcount(x); +} + +const inline uint countbits(uint x) { + return __builtin_elementwise_popcount(x); +} +const inline uint2 countbits(uint2 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint3 countbits(uint3 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint4 countbits(uint4 x) { + return __builtin_elementwise_popcount(x); +} + +const inline uint countbits(int64_t x) { + return __builtin_elementwise_popcount(x); +} +const inline uint2 countbits(int64_t2 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint3 countbits(int64_t3 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint4 countbits(int64_t4 x) { + return __builtin_elementwise_popcount(x); +} + +const inline uint countbits(uint64_t x) { + return __builtin_elementwise_popcount(x); +} +const inline uint2 countbits(uint64_t2 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint3 countbits(uint64_t3 x) { + return __builtin_elementwise_popcount(x); +} +const inline uint4 countbits(uint64_t4 x) { + return __builtin_elementwise_popcount(x); +} //===----------------------------------------------------------------------===// // degrees builtins diff --git a/clang/test/CodeGenHLSL/builtins/countbits.hlsl b/clang/test/CodeGenHLSL/builtins/countbits.hlsl index 8dfe977bfae6269..218d8dcd10f8d70 100644 --- a/clang/test/CodeGenHLSL/builtins/countbits.hlsl +++ b/clang/test/CodeGenHLSL/builtins/countbits.hlsl @@ -4,26 +4,37 @@ #ifdef __HLSL_ENABLE_16_BIT // CHECK-LABEL: test_countbits_ushort -// CHECK: call i16 @llvm.ctpop.i16 -uint16_t test_countbits_ushort(uint16_t p0) +// CHECK: [[A:%.*]] = call i16 @llvm.ctpop.i16 +// CHECK-NEXT: zext i16 [[A]] to i32 +uint test_countbits_ushort(uint16_t p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_short +// CHECK: [[A:%.*]] = call i16 @llvm.ctpop.i16 +// CHECK-NEXT: sext i16 [[A]] to i32 +uint test_countbits_short(int16_t p0) { return countbits(p0); } // CHECK-LABEL: test_countbits_ushort2 -// CHECK: call <2 x i16> @llvm.ctpop.v2i16 -uint16_t2 test_countbits_ushort2(uint16_t2 p0) +// CHECK: [[A:%.*]] = call <2 x i16> @llvm.ctpop.v2i16 +// CHECK-NEXT: zext <2 x i16> [[A]] to <2 x i32> +uint2 test_countbits_ushort2(uint16_t2 p0) { return countbits(p0); } // CHECK-LABEL: test_countbits_ushort3 -// CHECK: call <3 x i16> @llvm.ctpop.v3i16 -uint16_t3 test_countbits_ushort3(uint16_t3 p0) +// CHECK: [[A:%.*]] = call <3 x i16> @llvm.ctpop.v3i16 +// CHECK-NEXT: zext <3 x i16> [[A]] to <3 x i32> +uint3 test_countbits_ushort3(uint16_t3 p0) { return countbits(p0); } // CHECK-LABEL: test_countbits_ushort4 -// CHECK: call <4 x i16> @llvm.ctpop.v4i16 -uint16_t4 test_countbits_ushort4(uint16_t4 p0) +// CHECK: [[A:%.*]] = call <4 x i16> @llvm.ctpop.v4i16 +// CHECK-NEXT: zext <4 x i16> [[A]] to <4 x i32> +uint4 test_countbits_ushort4(uint16_t4 p0) { return countbits(p0); } @@ -31,7 +42,13 @@ uint16_t4 test_countbits_ushort4(uint16_t4 p0) // CHECK-LABEL: test_countbits_uint // CHECK: call i32 @llvm.ctpop.i32 -int test_countbits_uint(uint p0) +uint test_countbits_uint(uint p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_int +// CHECK: call i32 @llvm.ctpop.i32 +uint test_countbits_int(int p0) { return countbits(p0); } @@ -55,26 +72,37 @@ uint4 test_countbits_uint4(uint4 p0) } // CHECK-LABEL: test_countbits_long -// CHECK: call i64 @llvm.ctpop.i64 -uint64_t test_countbits_long(uint64_t p0) +// CHECK: [[A:%.*]] = call i64 @llvm.ctpop.i64 +// CHECK-NEXT: trunc i64 [[A]] to i32 +uint test_countbits_long(uint64_t p0) +{ + return countbits(p0); +} +// CHECK-LABEL: test_countbits_slong +// CHECK: [[A:%.*]] = call i64 @llvm.ctpop.i64 +// CHECK-NEXT: trunc i64 [[A]] to i32 +uint test_countbits_slong(int64_t p0) { return countbits(p0); } // CHECK-LABEL: test_countbits_long2 -// CHECK: call <2 x i64> @llvm.ctpop.v2i64 -uint64_t2 test_countbits_long2(uint64_t2 p0) +// CHECK: [[A:%.*]] = call <2 x i64> @llvm.ctpop.v2i64 +// CHECK-NEXT: trunc <2 x i64> [[A]] to <2 x i32> +uint2 test_countbits_long2(uint64_t2 p0) { return countbits(p0); } // CHECK-LABEL: test_countbits_long3 -// CHECK: call <3 x i64> @llvm.ctpop.v3i64 -uint64_t3 test_countbits_long3(uint64_t3 p0) +// CHECK: [[A:%.*]] = call <3 x i64> @llvm.ctpop.v3i64 +// CHECK-NEXT: trunc <3 x i64> [[A]] to <3 x i32> +uint3 test_countbits_long3(uint64_t3 p0) { return countbits(p0); } // CHECK-LABEL: test_countbits_long4 -// CHECK: call <4 x i64> @llvm.ctpop.v4i64 -uint64_t4 test_countbits_long4(uint64_t4 p0) +// CHECK: [[A:%.*]] = call <4 x i64> @llvm.ctpop.v4i64 +// CHECK-NEXT: trunc <4 x i64> [[A]] to <4 x i32> +uint4 test_countbits_long4(uint64_t4 p0) { return countbits(p0); } diff --git a/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl index 8d5f0abb2860f8e..5704165e1a45053 100644 --- a/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl @@ -1,6 +1,4 @@ -// RUN: %clang_cc1 -finclude-default-header -// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -// -disable-llvm-passes -verify -verify-ignore-unexpected +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected double test_int_builtin(double p0) { @@ -9,13 +7,11 @@ double test_int_builtin(double p0) { } double2 test_int_builtin_2(double2 p0) { - return __builtin_elementwise_popcount(p0); - // expected-error@-1 {{1st argument must be a vector of integers - // (was 'double2' (aka 'vector'))}} + return countbits(p0); + // expected-error@-1 {{call to 'countbits' is ambiguous}} } double test_int_builtin_3(float p0) { - return __builtin_elementwise_popcount(p0); - // expected-error@-1 {{1st argument must be a vector of integers - // (was 'float')}} + return countbits(p0); + // expected-error@-1 {{call to 'countbits' is ambiguous}} } diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 68ae5de06423c2a..1e8dc63ffa257e1 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -554,11 +554,10 @@ def Rbits : DXILOp<30, unary> { let attributes = [Attributes]; } -def CBits : DXILOp<31, unary> { +def CountBits : DXILOp<31, unaryBits> { let Doc = "Returns the number of 1 bits in the specified value."; - let LLVMIntrinsic = int_ctpop; let arguments = [OverloadTy]; - let result = OverloadTy; + let result = Int32Ty; let overloads = [Overloads]; let stages = [Stages]; diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index f7722d77074764d..8acc9c1efa08c08 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -505,6 +505,73 @@ class OpLowerer { }); } + [[nodiscard]] bool lowerCtpopToCountBits(Function &F) { + IRBuilder<> &IRB = OpBuilder.getIRB(); + Type *Int32Ty = IRB.getInt32Ty(); + + return replaceFunction(F, [&](CallInst *CI) -> Error { + IRB.SetInsertPoint(CI); + SmallVector Args; + Args.append(CI->arg_begin(), CI->arg_end()); + + Type *RetTy = Int32Ty; + Type *FRT = F.getReturnType(); + if (const auto *VT = dyn_cast(FRT)) + RetTy = VectorType::get(RetTy, VT); + + Expected OpCall = OpBuilder.tryCreateOp( + dxil::OpCode::CountBits, Args, CI->getName(), RetTy); + if (Error E = OpCall.takeError()) + return E; + + // If the result type is 32 bits we can do a direct replacement. + if (FRT->isIntOrIntVectorTy(32)) { + CI->replaceAllUsesWith(*OpCall); + CI->eraseFromParent(); + return Error::success(); + } + + unsigned CastOp; + unsigned CastOp2; + if (FRT->isIntOrIntVectorTy(16)) { + CastOp = Instruction::ZExt; + CastOp2 = Instruction::SExt; + } else { // must be 64 bits + assert(FRT->isIntOrIntVectorTy(64) && + "Currently only lowering 16, 32, or 64 bit ctpop to CountBits \ + is supported."); + CastOp = Instruction::Trunc; + CastOp2 = Instruction::Trunc; + } + + // It is correct to replace the ctpop with the dxil op and + // remove all casts to i32 + bool NeedsCast = false; + for (User *User : make_early_inc_range(CI->users())) { + Instruction *I = dyn_cast(User); + if (I && (I->getOpcode() == CastOp || I->getOpcode() == CastOp2) && + I->getType() == RetTy) { + I->replaceAllUsesWith(*OpCall); + I->eraseFromParent(); + } else + NeedsCast = true; + } + + // It is correct to replace a ctpop with the dxil op and + // a cast from i32 to the return type of the ctpop + // the cast is emitted here if there is a non-cast to i32 + // instr which uses the ctpop + if (NeedsCast) { + Value *Cast = + IRB.CreateZExtOrTrunc(*OpCall, F.getReturnType(), "ctpop.cast"); + CI->replaceAllUsesWith(Cast); + } + + CI->eraseFromParent(); + return Error::success(); + }); + } + bool lowerIntrinsics() { bool Updated = false; bool HasErrors = false; @@ -543,6 +610,9 @@ class OpLowerer { return replaceSplitDoubleCallUsages(CI, Op); }); break; + case Intrinsic::ctpop: + HasErrors |= lowerCtpopToCountBits(F); + break; } Updated = true; } diff --git a/llvm/test/CodeGen/DirectX/countbits.ll b/llvm/test/CodeGen/DirectX/countbits.ll index c6bc2b6790948ee..f03ab9c5e79c354 100644 --- a/llvm/test/CodeGen/DirectX/countbits.ll +++ b/llvm/test/CodeGen/DirectX/countbits.ll @@ -4,35 +4,67 @@ define noundef i16 @test_countbits_short(i16 noundef %a) { entry: -; CHECK: call i16 @dx.op.unary.i16(i32 31, i16 %{{.*}}) +; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}}) +; CHECK-NEXT: [[B:%.*]] = trunc i32 [[A]] to i16 +; CHECK-NEXT ret i16 [[B]] %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a) ret i16 %elt.ctpop } +define noundef i32 @test_countbits_short2(i16 noundef %a) { +entry: +; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}}) +; CHECK-NEXT: ret i32 [[A]] + %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a) + %elt.zext = zext i16 %elt.ctpop to i32 + ret i32 %elt.zext +} + +define noundef i32 @test_countbits_short3(i16 noundef %a) { +entry: +; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}}) +; CHECK-NEXT: ret i32 [[A]] + %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a) + %elt.sext = sext i16 %elt.ctpop to i32 + ret i32 %elt.sext +} + define noundef i32 @test_countbits_int(i32 noundef %a) { entry: -; CHECK: call i32 @dx.op.unary.i32(i32 31, i32 %{{.*}}) +; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 %{{.*}}) +; CHECK-NEXT: ret i32 [[A]] %elt.ctpop = call i32 @llvm.ctpop.i32(i32 %a) ret i32 %elt.ctpop } define noundef i64 @test_countbits_long(i64 noundef %a) { entry: -; CHECK: call i64 @dx.op.unary.i64(i32 31, i64 %{{.*}}) +; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}}) +; CHECK-NEXT: [[B:%.*]] = zext i32 [[A]] to i64 +; CHECK-NEXT ret i64 [[B]] %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a) ret i64 %elt.ctpop } +define noundef i32 @test_countbits_long2(i64 noundef %a) { +entry: +; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}}) +; CHECK-NEXT: ret i32 [[A]] + %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a) + %elt.trunc = trunc i64 %elt.ctpop to i32 + ret i32 %elt.trunc +} + define noundef <4 x i32> @countbits_vec4_i32(<4 x i32> noundef %a) { entry: ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0 - ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee0]]) + ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee0]]) ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1 - ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee1]]) + ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee1]]) ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2 - ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee2]]) + ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee2]]) ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3 - ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee3]]) + ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee3]]) ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0 ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1 ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2 From a156362e93eba9513611dc0989d516e9946cae48 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 29 Oct 2024 14:59:37 +0000 Subject: [PATCH 303/425] [AMDGPU] Fix machine verification failure after SIFoldOperandsImpl::tryFoldOMod (#113544) Fixes #54201 --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 3 ++ llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir | 50 ++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index c912a580854c1c4..f0c7837e0bb75ac 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1793,6 +1793,9 @@ bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) { DefOMod->setImm(OMod); MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg()); + // Kill flags can be wrong if we replaced a def inside a loop with a def + // outside the loop. + MRI->clearKillFlags(Def->getOperand(0).getReg()); MI.eraseFromParent(); // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac diff --git a/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir new file mode 100644 index 000000000000000..8065e2cfc004322 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir @@ -0,0 +1,50 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands %s -verify-machineinstrs -o - | FileCheck %s -check-prefix=GFX9 + +# When V_ADD_F32 is replaced with an output modifier on V_RSQ_F32, check that +# the kill flag is cleared on the use of %4 in V_MUL_F32. +--- +name: main +tracksRegLiveness: true +machineFunctionInfo: + mode: + ieee: false + fp32-input-denormals: false + fp32-output-denormals: false +body: | + ; GFX9-LABEL: name: main + ; GFX9: bb.0: + ; GFX9-NEXT: successors: %bb.1(0x80000000) + ; GFX9-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; GFX9-NEXT: [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef [[DEF]], 0, 1, implicit $mode, implicit $exec + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: bb.1: + ; GFX9-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-NEXT: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, killed undef [[DEF2]], 0, [[V_RSQ_F32_e64_]], 0, 0, implicit $mode, implicit $exec + ; GFX9-NEXT: SI_LOOP undef [[DEF1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX9-NEXT: S_BRANCH %bb.2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: bb.2: + ; GFX9-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + + %0:vgpr_32 = IMPLICIT_DEF + %1:sreg_64 = IMPLICIT_DEF + %2:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef %0, 0, 0, implicit $mode, implicit $exec + + bb.1: + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = nsz reassoc nofpexcept V_ADD_F32_e64 0, undef %2, 0, undef %2, 0, 0, implicit $mode, implicit $exec + %5:vgpr_32 = V_MUL_F32_e64 0, killed undef %3, 0, killed %4, 0, 0, implicit $mode, implicit $exec + SI_LOOP undef %1, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... From 1e991b1021c1d7694e1a0dfe9e261fb27555f05f Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 29 Oct 2024 11:06:18 -0400 Subject: [PATCH 304/425] Nominate Corentin Jabot for lambdas (#114043) Corentin has largely been handling reviews touching lambdas for the past year or two, so he has significant understanding of the various moving parts of this fairly substantial C++ feature. Given that work on lambdas tends to be somewhat specialized, I think it makes sense for it to have dedicated oversight. --- clang/Maintainers.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index 54690452681a608..39f46457e676a80 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -78,6 +78,12 @@ Templates | ekeane\@nvidia.com (email), ErichKeane (Phabricator), erichkeane (GitHub) +Lambdas +~~~~~~~ +| Corentin Jabot +| corentin.jabot\@gmail.com (email), cor3ntin (Phabricator), cor3ntin (GitHub) + + Debug information ~~~~~~~~~~~~~~~~~ | Adrian Prantl From d43e4ce77d0a314139655c9cf7c3b533b5b72440 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 29 Oct 2024 11:17:11 -0400 Subject: [PATCH 305/425] Revert "[gn] port b1be21394e9c" b1be21394e9c was reverted in 3ac75ee8ec. This reverts commit 18f4b7e4a862c11816e62cc72fb2a4ca8fac1987, as well as follow-ups a69d2a18d207947a25838dd01d2116bee384b75b and 4a6b56960f445d111adc9aef799acad8c6ca41f0. --- llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn index 020f3e7d9acd7bb..5fbda794ff176ef 100644 --- a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn @@ -55,8 +55,6 @@ write_cmake_config("lit_common_configured") { "COMPILER_RT_ENABLE_INTERNAL_SYMBOLIZER_PYBOOL=False", "COMPILER_RT_HAS_NO_DEFAULT_CONFIG_FLAG_PYBOOL=True", "COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL=False", - "COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR=" + - rebase_path("$root_build_dir/bin"), "COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR=" + rebase_path(crt_current_out_dir), "COMPILER_RT_RESOLVED_OUTPUT_DIR=" + rebase_path(crt_current_out_dir), From f906d765baa0a17519b6d3310ba32e1b51b88c6d Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 29 Oct 2024 15:18:08 +0000 Subject: [PATCH 306/425] [gn build] Port 5ea694816b56 --- .../unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn index 44640c6527c903b..97df71c6279efde 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn @@ -14,5 +14,6 @@ unittest("SandboxVectorizerTests") { "IntervalTest.cpp", "LegalityTest.cpp", "SchedulerTest.cpp", + "VecUtilsTest.cpp", ] } From af44976cad04d8470f205f557eaf172ee1eff0df Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 29 Oct 2024 15:18:09 +0000 Subject: [PATCH 307/425] [gn build] Port 6128ff663076 --- .../secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn index 6f52677cb83338e..7f74b335e30ed99 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn @@ -17,8 +17,9 @@ unittest("JITLinkTests") { "AArch32ErrorTests.cpp", "AArch32Tests.cpp", "EHFrameSupportTests.cpp", - "JITLinkMocks.cpp", + "JITLinkTestUtils.cpp", "LinkGraphTests.cpp", + "MachOLinkGraphTests.cpp", "MemoryManagerErrorTests.cpp", "StubsTests.cpp", ] From bf6c483e4714841b1511ea3666f05a468bd988fe Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 29 Oct 2024 15:15:38 +0000 Subject: [PATCH 308/425] [clang][x86] Add constexpr support for SSE2 _mm_set*_epi* intrinsics --- clang/lib/Headers/emmintrin.h | 46 ++++++++++--------- clang/test/CodeGen/X86/builtin_test_helpers.h | 10 ++++ clang/test/CodeGen/X86/sse2-builtins.c | 14 ++++++ 3 files changed, 49 insertions(+), 21 deletions(-) diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 778cdf99a129642..4f00b7f1a8d9d4e 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -3512,8 +3512,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) { /// destination vector of [2 x i64]. /// \returns An initialized 128-bit vector of [2 x i64] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, - long long __q0) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set_epi64x(long long __q1, long long __q0) { return __extension__(__m128i)(__v2di){__q0, __q1}; } @@ -3533,9 +3533,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, /// destination vector of [2 x i64]. /// \returns An initialized 128-bit vector of [2 x i64] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, - __m64 __q0) { - return _mm_set_epi64x((long long)__q1, (long long)__q0); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set_epi64(__m64 __q1, __m64 __q0) { + return _mm_set_epi64x((long long)__q1[0], (long long)__q0[0]); } /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with @@ -3560,8 +3560,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, /// vector. /// \returns An initialized 128-bit vector of [4 x i32] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, - int __i1, int __i0) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi32(int __i3, + int __i2, + int __i1, + int __i0) { return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3}; } @@ -3599,7 +3601,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, /// vector. /// \returns An initialized 128-bit vector of [8 x i16] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0) { return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3, @@ -3648,7 +3650,7 @@ _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, /// Initializes bits [7:0] of the destination vector. /// \returns An initialized 128-bit vector of [16 x i8] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { @@ -3670,7 +3672,8 @@ _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, /// vector. /// \returns An initialized 128-bit integer vector of [2 x i64] with both /// elements containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set1_epi64x(long long __q) { return _mm_set_epi64x(__q, __q); } @@ -3687,7 +3690,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { /// vector. /// \returns An initialized 128-bit vector of [2 x i64] with all elements /// containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set1_epi64(__m64 __q) { return _mm_set_epi64(__q, __q); } @@ -3704,7 +3708,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { /// vector. /// \returns An initialized 128-bit vector of [4 x i32] with all elements /// containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i) { return _mm_set_epi32(__i, __i, __i, __i); } @@ -3721,7 +3725,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { /// vector. /// \returns An initialized 128-bit vector of [8 x i16] with all elements /// containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set1_epi16(short __w) { return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); } @@ -3738,7 +3743,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { /// vector. /// \returns An initialized 128-bit vector of [16 x i8] with all elements /// containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b) { return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); } @@ -3757,8 +3762,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { /// A 64-bit integral value used to initialize the upper 64 bits of the /// result. /// \returns An initialized 128-bit integer vector. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, - __m64 __q1) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_setr_epi64(__m64 __q0, __m64 __q1) { return _mm_set_epi64(__q1, __q0); } @@ -3779,9 +3784,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, /// \param __i3 /// A 32-bit integral value used to initialize bits [127:96] of the result. /// \returns An initialized 128-bit integer vector. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, - int __i2, - int __i3) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) { return _mm_set_epi32(__i3, __i2, __i1, __i0); } @@ -3810,7 +3814,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, /// \param __w7 /// A 16-bit integral value used to initialize bits [127:112] of the result. /// \returns An initialized 128-bit integer vector. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7) { return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); @@ -3857,7 +3861,7 @@ _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, /// \param __b15 /// An 8-bit integral value used to initialize bits [127:120] of the result. /// \returns An initialized 128-bit integer vector. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15) { diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h index f6547d4cb29ca3d..01800db33afbb9b 100644 --- a/clang/test/CodeGen/X86/builtin_test_helpers.h +++ b/clang/test/CodeGen/X86/builtin_test_helpers.h @@ -25,6 +25,16 @@ constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) { return v[0] == a && v[1] == b && v[2] == c && v[3] == d; } +constexpr bool match_v8hi(__m128i _v, short a, short b, short c, short d, short e, short f, short g, short h) { + __v8hi v = (__v8hi)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; +} + +constexpr bool match_v16qi(__m128i _v, char a, char b, char c, char d, char e, char f, char g, char h, char i, char j, char k, char l, char m, char n, char o, char p) { + __v16qi v = (__v16qi)_v; + return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p; +} + constexpr bool match_m256(__m256 v, float a, float b, float c, float d, float e, float f, float g, float h) { return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h; } diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 82aa7a2d2b49d11..c4493a49120543a 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1013,6 +1013,7 @@ __m128i test_mm_set_epi8(char A, char B, char C, char D, // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P); } +TEST_CONSTEXPR(match_v16qi(_mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); __m128i test_mm_set_epi16(short A, short B, short C, short D, short E, short F, short G, short H) { @@ -1027,6 +1028,7 @@ __m128i test_mm_set_epi16(short A, short B, short C, short D, // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 return _mm_set_epi16(A, B, C, D, E, F, G, H); } +TEST_CONSTEXPR(match_v8hi(_mm_set_epi16(0, -1, -2, -3, -4, -5, -6, -7), -7, -6, -5, -4, -3, -2, -1, 0)); __m128i test_mm_set_epi32(int A, int B, int C, int D) { // CHECK-LABEL: test_mm_set_epi32 @@ -1036,6 +1038,7 @@ __m128i test_mm_set_epi32(int A, int B, int C, int D) { // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 return _mm_set_epi32(A, B, C, D); } +TEST_CONSTEXPR(match_v4si(_mm_set_epi32(1, -3, 5, -7), -7, 5, -3, 1)); __m128i test_mm_set_epi64(__m64 A, __m64 B) { // CHECK-LABEL: test_mm_set_epi64 @@ -1043,6 +1046,7 @@ __m128i test_mm_set_epi64(__m64 A, __m64 B) { // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 return _mm_set_epi64(A, B); } +TEST_CONSTEXPR(match_v2di(_mm_set_epi64((__m64){-1}, (__m64){42}), 42, -1)); __m128i test_mm_set_epi64x(long long A, long long B) { // CHECK-LABEL: test_mm_set_epi64x @@ -1050,6 +1054,7 @@ __m128i test_mm_set_epi64x(long long A, long long B) { // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 return _mm_set_epi64x(A, B); } +TEST_CONSTEXPR(match_v2di(_mm_set_epi64x(100, -1000), -1000, 100)); __m128d test_mm_set_pd(double A, double B) { // CHECK-LABEL: test_mm_set_pd @@ -1095,6 +1100,7 @@ __m128i test_mm_set1_epi8(char A) { // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 return _mm_set1_epi8(A); } +TEST_CONSTEXPR(match_v16qi(_mm_set1_epi8(99), 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99)); __m128i test_mm_set1_epi16(short A) { // CHECK-LABEL: test_mm_set1_epi16 @@ -1108,6 +1114,7 @@ __m128i test_mm_set1_epi16(short A) { // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 return _mm_set1_epi16(A); } +TEST_CONSTEXPR(match_v8hi(_mm_set1_epi16(-128), -128, -128, -128, -128, -128, -128, -128, -128)); __m128i test_mm_set1_epi32(int A) { // CHECK-LABEL: test_mm_set1_epi32 @@ -1117,6 +1124,7 @@ __m128i test_mm_set1_epi32(int A) { // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 return _mm_set1_epi32(A); } +TEST_CONSTEXPR(match_v4si(_mm_set1_epi32(55), 55, 55, 55, 55)); __m128i test_mm_set1_epi64(__m64 A) { // CHECK-LABEL: test_mm_set1_epi64 @@ -1124,6 +1132,7 @@ __m128i test_mm_set1_epi64(__m64 A) { // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 return _mm_set1_epi64(A); } +TEST_CONSTEXPR(match_v2di(_mm_set1_epi64((__m64){-65535}), -65535, -65535)); __m128i test_mm_set1_epi64x(long long A) { // CHECK-LABEL: test_mm_set1_epi64x @@ -1131,6 +1140,7 @@ __m128i test_mm_set1_epi64x(long long A) { // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 return _mm_set1_epi64x(A); } +TEST_CONSTEXPR(match_v2di(_mm_set1_epi64x(65536), 65536, 65536)); __m128d test_mm_set1_pd(double A) { // CHECK-LABEL: test_mm_set1_pd @@ -1163,6 +1173,7 @@ __m128i test_mm_setr_epi8(char A, char B, char C, char D, // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15 return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P); } +TEST_CONSTEXPR(match_v16qi(_mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); __m128i test_mm_setr_epi16(short A, short B, short C, short D, short E, short F, short G, short H) { @@ -1177,6 +1188,7 @@ __m128i test_mm_setr_epi16(short A, short B, short C, short D, // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7 return _mm_setr_epi16(A, B, C, D, E, F, G, H); } +TEST_CONSTEXPR(match_v8hi(_mm_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7), 0, -1, -2, -3, -4, -5, -6, -7)); __m128i test_mm_setr_epi32(int A, int B, int C, int D) { // CHECK-LABEL: test_mm_setr_epi32 @@ -1186,6 +1198,7 @@ __m128i test_mm_setr_epi32(int A, int B, int C, int D) { // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3 return _mm_setr_epi32(A, B, C, D); } +TEST_CONSTEXPR(match_v4si(_mm_setr_epi32(1, -3, 5, -7), 1, -3, 5, -7)); __m128i test_mm_setr_epi64(__m64 A, __m64 B) { // CHECK-LABEL: test_mm_setr_epi64 @@ -1193,6 +1206,7 @@ __m128i test_mm_setr_epi64(__m64 A, __m64 B) { // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 return _mm_setr_epi64(A, B); } +TEST_CONSTEXPR(match_v2di(_mm_setr_epi64((__m64){-1}, (__m64){42}), -1, 42)); __m128d test_mm_setr_pd(double A, double B) { // CHECK-LABEL: test_mm_setr_pd From a9c417c28a25c153aa0fdbe2eb5453a93820a3b1 Mon Sep 17 00:00:00 2001 From: Hugo Trachino Date: Tue, 29 Oct 2024 15:47:13 +0000 Subject: [PATCH 309/425] [MLIR][SCF] Fix LoopPeelOp documentation (NFC) (#113179) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As an example, I added annotations to the peel_front unit test. ``` func.func @loop_peel_first_iter_op() { // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[C41:.+]] = arith.constant 41 // CHECK: %[[C5:.+]] = arith.constant 5 // CHECK: %[[C5_0:.+]] = arith.constant 5 // CHECK: scf.for %{{.+}} = %[[C0]] to %[[C5_0]] step %[[C5]] // CHECK: arith.addi // CHECK: scf.for %{{.+}} = %[[C5_0]] to %[[C41]] step %[[C5]] // CHECK: arith.addi %0 = arith.constant 0 : index %1 = arith.constant 41 : index %2 = arith.constant 5 : index scf.for %i = %0 to %1 step %2 { arith.addi %i, %i : index } return } module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["arith.addi"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1 = transform.get_parent_op %0 {op_name = "scf.for"} : (!transform.any_op) -> !transform.op<"scf.for"> %main_loop, %remainder = transform.loop.peel %1 {peel_front = true} : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">, !transform.op<"scf.for">) transform.annotate %main_loop "main_loop" : !transform.op<"scf.for"> transform.annotate %remainder "remainder" : !transform.op<"scf.for"> transform.yield } } ``` Gives : ``` func.func @loop_peel_first_iter_op() { %c0 = arith.constant 0 : index %c41 = arith.constant 41 : index %c5 = arith.constant 5 : index %c5_0 = arith.constant 5 : index scf.for %arg0 = %c0 to %c5_0 step %c5 { %0 = arith.addi %arg0, %arg0 : index } {remainder} // The first iteration loop (second result) has been annotated remainder scf.for %arg0 = %c5_0 to %c41 step %c5 { %0 = arith.addi %arg0, %arg0 : index } {main_loop} // The main loop (first result) has been annotated main_loop return } ``` --------- Co-authored-by: Andrzej Warzyński --- .../SCF/TransformOps/SCFTransformOps.td | 28 ++++++++++--------- .../SCF/Transforms/LoopSpecialization.cpp | 11 ++++---- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td index 20880d94a83cacb..5dba8c5e57ba861 100644 --- a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td +++ b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td @@ -146,7 +146,7 @@ def LoopPeelOp : Op Date: Tue, 29 Oct 2024 09:25:51 -0700 Subject: [PATCH 310/425] [SandboxIR] Add callbacks for instruction insert/remove/move ops (#112965) --- llvm/include/llvm/SandboxIR/Context.h | 66 ++++++++++++++- llvm/lib/SandboxIR/Context.cpp | 73 ++++++++++++++-- llvm/lib/SandboxIR/Instruction.cpp | 3 + llvm/unittests/SandboxIR/SandboxIRTest.cpp | 98 ++++++++++++++++++++++ 4 files changed, 233 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/SandboxIR/Context.h b/llvm/include/llvm/SandboxIR/Context.h index 1285598a1c02822..f2056de87cb946c 100644 --- a/llvm/include/llvm/SandboxIR/Context.h +++ b/llvm/include/llvm/SandboxIR/Context.h @@ -9,18 +9,39 @@ #ifndef LLVM_SANDBOXIR_CONTEXT_H #define LLVM_SANDBOXIR_CONTEXT_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/LLVMContext.h" #include "llvm/SandboxIR/Tracker.h" #include "llvm/SandboxIR/Type.h" +#include + namespace llvm::sandboxir { -class Module; -class Value; class Argument; +class BBIterator; class Constant; +class Module; +class Value; class Context { +public: + // A EraseInstrCallback receives the instruction about to be erased. + using EraseInstrCallback = std::function; + // A CreateInstrCallback receives the instruction about to be created. + using CreateInstrCallback = std::function; + // A MoveInstrCallback receives the instruction about to be moved, the + // destination BB and an iterator pointing to the insertion position. + using MoveInstrCallback = + std::function; + + /// An ID for a registered callback. Used for deregistration. Using a 64-bit + /// integer so we don't have to worry about the unlikely case of overflowing + /// a 32-bit counter. + using CallbackID = uint64_t; + protected: LLVMContext &LLVMCtx; friend class Type; // For LLVMCtx. @@ -48,6 +69,21 @@ class Context { /// Type objects. DenseMap> LLVMTypeToTypeMap; + /// Callbacks called when an IR instruction is about to get erased. Keys are + /// used as IDs for deregistration. + MapVector EraseInstrCallbacks; + /// Callbacks called when an IR instruction is about to get created. Keys are + /// used as IDs for deregistration. + MapVector CreateInstrCallbacks; + /// Callbacks called when an IR instruction is about to get moved. Keys are + /// used as IDs for deregistration. + MapVector MoveInstrCallbacks; + + /// A counter used for assigning callback IDs during registration. The same + /// counter is used for all kinds of callbacks so we can detect mismatched + /// registration/deregistration. + CallbackID NextCallbackID = 0; + /// Remove \p V from the maps and returns the unique_ptr. std::unique_ptr detachLLVMValue(llvm::Value *V); /// Remove \p SBV from all SandboxIR maps and stop owning it. This effectively @@ -70,6 +106,10 @@ class Context { Constant *getOrCreateConstant(llvm::Constant *LLVMC); friend class Utils; // For getMemoryBase + void runEraseInstrCallbacks(Instruction *I); + void runCreateInstrCallbacks(Instruction *I); + void runMoveInstrCallbacks(Instruction *I, const BBIterator &Where); + // Friends for getOrCreateConstant(). #define DEF_CONST(ID, CLASS) friend class CLASS; #include "llvm/SandboxIR/Values.def" @@ -198,6 +238,28 @@ class Context { /// \Returns the number of values registered with Context. size_t getNumValues() const { return LLVMValueToValueMap.size(); } + + /// Register a callback that gets called when a SandboxIR instruction is about + /// to be removed from its parent. Note that this will also be called when + /// reverting the creation of an instruction. + /// \Returns a callback ID for later deregistration. + CallbackID registerEraseInstrCallback(EraseInstrCallback CB); + void unregisterEraseInstrCallback(CallbackID ID); + + /// Register a callback that gets called right after a SandboxIR instruction + /// is created. Note that this will also be called when reverting the removal + /// of an instruction. + /// \Returns a callback ID for later deregistration. + CallbackID registerCreateInstrCallback(CreateInstrCallback CB); + void unregisterCreateInstrCallback(CallbackID ID); + + /// Register a callback that gets called when a SandboxIR instruction is about + /// to be moved. Note that this will also be called when reverting a move. + /// \Returns a callback ID for later deregistration. + CallbackID registerMoveInstrCallback(MoveInstrCallback CB); + void unregisterMoveInstrCallback(CallbackID ID); + + // TODO: Add callbacks for instructions inserted/removed if needed. }; } // namespace llvm::sandboxir diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp index 486e935bc35fba8..5e5cbbbc4515d23 100644 --- a/llvm/lib/SandboxIR/Context.cpp +++ b/llvm/lib/SandboxIR/Context.cpp @@ -35,17 +35,20 @@ Value *Context::registerValue(std::unique_ptr &&VPtr) { assert(VPtr->getSubclassID() != Value::ClassID::User && "Can't register a user!"); + Value *V = VPtr.get(); + [[maybe_unused]] auto Pair = + LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)}); + assert(Pair.second && "Already exists!"); + // Track creation of instructions. // Please note that we don't allow the creation of detached instructions, // meaning that the instructions need to be inserted into a block upon // creation. This is why the tracker class combines creation and insertion. - if (auto *I = dyn_cast(VPtr.get())) + if (auto *I = dyn_cast(V)) { getTracker().emplaceIfTracking(I); + runCreateInstrCallbacks(I); + } - Value *V = VPtr.get(); - [[maybe_unused]] auto Pair = - LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)}); - assert(Pair.second && "Already exists!"); return V; } @@ -660,4 +663,64 @@ Module *Context::createModule(llvm::Module *LLVMM) { return M; } +void Context::runEraseInstrCallbacks(Instruction *I) { + for (const auto &CBEntry : EraseInstrCallbacks) + CBEntry.second(I); +} + +void Context::runCreateInstrCallbacks(Instruction *I) { + for (auto &CBEntry : CreateInstrCallbacks) + CBEntry.second(I); +} + +void Context::runMoveInstrCallbacks(Instruction *I, const BBIterator &WhereIt) { + for (auto &CBEntry : MoveInstrCallbacks) + CBEntry.second(I, WhereIt); +} + +// An arbitrary limit, to check for accidental misuse. We expect a small number +// of callbacks to be registered at a time, but we can increase this number if +// we discover we needed more. +static constexpr int MaxRegisteredCallbacks = 16; + +Context::CallbackID Context::registerEraseInstrCallback(EraseInstrCallback CB) { + assert(EraseInstrCallbacks.size() <= MaxRegisteredCallbacks && + "EraseInstrCallbacks size limit exceeded"); + CallbackID ID = NextCallbackID++; + EraseInstrCallbacks[ID] = CB; + return ID; +} +void Context::unregisterEraseInstrCallback(CallbackID ID) { + [[maybe_unused]] bool Erased = EraseInstrCallbacks.erase(ID); + assert(Erased && + "Callback ID not found in EraseInstrCallbacks during deregistration"); +} + +Context::CallbackID +Context::registerCreateInstrCallback(CreateInstrCallback CB) { + assert(CreateInstrCallbacks.size() <= MaxRegisteredCallbacks && + "CreateInstrCallbacks size limit exceeded"); + CallbackID ID = NextCallbackID++; + CreateInstrCallbacks[ID] = CB; + return ID; +} +void Context::unregisterCreateInstrCallback(CallbackID ID) { + [[maybe_unused]] bool Erased = CreateInstrCallbacks.erase(ID); + assert(Erased && + "Callback ID not found in CreateInstrCallbacks during deregistration"); +} + +Context::CallbackID Context::registerMoveInstrCallback(MoveInstrCallback CB) { + assert(MoveInstrCallbacks.size() <= MaxRegisteredCallbacks && + "MoveInstrCallbacks size limit exceeded"); + CallbackID ID = NextCallbackID++; + MoveInstrCallbacks[ID] = CB; + return ID; +} +void Context::unregisterMoveInstrCallback(CallbackID ID) { + [[maybe_unused]] bool Erased = MoveInstrCallbacks.erase(ID); + assert(Erased && + "Callback ID not found in MoveInstrCallbacks during deregistration"); +} + } // namespace llvm::sandboxir diff --git a/llvm/lib/SandboxIR/Instruction.cpp b/llvm/lib/SandboxIR/Instruction.cpp index d80d10370e32d8e..096b827541eeafa 100644 --- a/llvm/lib/SandboxIR/Instruction.cpp +++ b/llvm/lib/SandboxIR/Instruction.cpp @@ -73,6 +73,8 @@ void Instruction::removeFromParent() { void Instruction::eraseFromParent() { assert(users().empty() && "Still connected to users, can't erase!"); + + Ctx.runEraseInstrCallbacks(this); std::unique_ptr Detached = Ctx.detach(this); auto LLVMInstrs = getLLVMInstrs(); @@ -100,6 +102,7 @@ void Instruction::moveBefore(BasicBlock &BB, const BBIterator &WhereIt) { // Destination is same as origin, nothing to do. return; + Ctx.runMoveInstrCallbacks(this, WhereIt); Ctx.getTracker().emplaceIfTracking(this); auto *LLVMBB = cast(BB.Val); diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 97113b303f72e5e..99e14292a91b927 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -22,6 +22,7 @@ #include "llvm/SandboxIR/Value.h" #include "llvm/Support/SourceMgr.h" #include "gmock/gmock-matchers.h" +#include "gmock/gmock-more-matchers.h" #include "gtest/gtest.h" using namespace llvm; @@ -5962,3 +5963,100 @@ TEST_F(SandboxIRTest, CheckClassof) { EXPECT_NE(&sandboxir::CLASS::classof, &sandboxir::Instruction::classof); #include "llvm/SandboxIR/Values.def" } + +TEST_F(SandboxIRTest, InstructionCallbacks) { + parseIR(C, R"IR( + define void @foo(ptr %ptr, i8 %val) { + ret void + } + )IR"); + Function &LLVMF = *M->getFunction("foo"); + sandboxir::Context Ctx(C); + + auto &F = *Ctx.createFunction(&LLVMF); + auto &BB = *F.begin(); + sandboxir::Argument *Ptr = F.getArg(0); + sandboxir::Argument *Val = F.getArg(1); + sandboxir::Instruction *Ret = &BB.front(); + + SmallVector Inserted; + auto InsertCbId = Ctx.registerCreateInstrCallback( + [&Inserted](sandboxir::Instruction *I) { Inserted.push_back(I); }); + + SmallVector Removed; + auto RemoveCbId = Ctx.registerEraseInstrCallback( + [&Removed](sandboxir::Instruction *I) { Removed.push_back(I); }); + + // Keep the moved instruction and the instruction pointed by the Where + // iterator so we can check both callback arguments work as expected. + SmallVector> + Moved; + auto MoveCbId = Ctx.registerMoveInstrCallback( + [&Moved](sandboxir::Instruction *I, const sandboxir::BBIterator &Where) { + // Use a nullptr to signal "move to end" to keep it single. We only + // have a basic block in this test case anyway. + if (Where == Where.getNodeParent()->end()) + Moved.push_back(std::make_pair(I, nullptr)); + else + Moved.push_back(std::make_pair(I, &*Where)); + }); + + // Two more insertion callbacks, to check that they're called in registration + // order. + SmallVector Order; + auto CheckOrderInsertCbId1 = Ctx.registerCreateInstrCallback( + [&Order](sandboxir::Instruction *I) { Order.push_back(1); }); + + auto CheckOrderInsertCbId2 = Ctx.registerCreateInstrCallback( + [&Order](sandboxir::Instruction *I) { Order.push_back(2); }); + + Ctx.save(); + auto *NewI = sandboxir::StoreInst::create(Val, Ptr, /*Align=*/std::nullopt, + Ret->getIterator(), Ctx); + EXPECT_THAT(Inserted, testing::ElementsAre(NewI)); + EXPECT_THAT(Removed, testing::IsEmpty()); + EXPECT_THAT(Moved, testing::IsEmpty()); + EXPECT_THAT(Order, testing::ElementsAre(1, 2)); + + Ret->moveBefore(NewI); + EXPECT_THAT(Inserted, testing::ElementsAre(NewI)); + EXPECT_THAT(Removed, testing::IsEmpty()); + EXPECT_THAT(Moved, testing::ElementsAre(std::make_pair(Ret, NewI))); + + Ret->eraseFromParent(); + EXPECT_THAT(Inserted, testing::ElementsAre(NewI)); + EXPECT_THAT(Removed, testing::ElementsAre(Ret)); + EXPECT_THAT(Moved, testing::ElementsAre(std::make_pair(Ret, NewI))); + + NewI->eraseFromParent(); + EXPECT_THAT(Inserted, testing::ElementsAre(NewI)); + EXPECT_THAT(Removed, testing::ElementsAre(Ret, NewI)); + EXPECT_THAT(Moved, testing::ElementsAre(std::make_pair(Ret, NewI))); + + // Check that after revert the callbacks have been called for the inverse + // operations of the changes made so far. + Ctx.revert(); + EXPECT_THAT(Inserted, testing::ElementsAre(NewI, NewI, Ret)); + EXPECT_THAT(Removed, testing::ElementsAre(Ret, NewI, NewI)); + EXPECT_THAT(Moved, testing::ElementsAre(std::make_pair(Ret, NewI), + std::make_pair(Ret, nullptr))); + EXPECT_THAT(Order, testing::ElementsAre(1, 2, 1, 2, 1, 2)); + + // Check that deregistration works. Do an operation of each type after + // deregistering callbacks and check. + Inserted.clear(); + Removed.clear(); + Moved.clear(); + Ctx.unregisterCreateInstrCallback(InsertCbId); + Ctx.unregisterEraseInstrCallback(RemoveCbId); + Ctx.unregisterMoveInstrCallback(MoveCbId); + Ctx.unregisterCreateInstrCallback(CheckOrderInsertCbId1); + Ctx.unregisterCreateInstrCallback(CheckOrderInsertCbId2); + auto *NewI2 = sandboxir::StoreInst::create(Val, Ptr, /*Align=*/std::nullopt, + Ret->getIterator(), Ctx); + Ret->moveBefore(NewI2); + Ret->eraseFromParent(); + EXPECT_THAT(Inserted, testing::IsEmpty()); + EXPECT_THAT(Removed, testing::IsEmpty()); + EXPECT_THAT(Moved, testing::IsEmpty()); +} From 318bdd0aeb721c8e9bd67101ac6641e5f9d990f2 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 29 Oct 2024 09:26:47 -0700 Subject: [PATCH 311/425] [StackSafetyAnalysis] Bail out when calling ifunc An assertion failure arises when a call instruction calls a GlobalIFunc. Since we cannot reason about the underlying function, just bail out. Fix #87923 Pull Request: https://github.com/llvm/llvm-project/pull/113841 --- llvm/lib/Analysis/StackSafetyAnalysis.cpp | 2 +- llvm/test/Analysis/StackSafetyAnalysis/local.ll | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp index 27360d0e84cb2b8..5d81658409dae85 100644 --- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp +++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp @@ -528,7 +528,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr, // dso_preemptable aliases or aliases with interposable linkage. const GlobalValue *Callee = dyn_cast(CB.getCalledOperand()->stripPointerCasts()); - if (!Callee) { + if (!Callee || isa(Callee)) { US.addRange(I, UnknownRange, /*IsSafe=*/false); break; } diff --git a/llvm/test/Analysis/StackSafetyAnalysis/local.ll b/llvm/test/Analysis/StackSafetyAnalysis/local.ll index 4a833611c78916e..02d46c8449bae53 100644 --- a/llvm/test/Analysis/StackSafetyAnalysis/local.ll +++ b/llvm/test/Analysis/StackSafetyAnalysis/local.ll @@ -1120,5 +1120,21 @@ define void @NonPointer(ptr %p) { ret void } +@ifunc = dso_local ifunc i64 (ptr), ptr @ifunc_resolver + +define dso_local void @CallIfunc(ptr noundef %uaddr) local_unnamed_addr { +; CHECK-LABEL: @CallIfunc +; CHECK-NEXT: args uses: +; CHECK-NEXT: uaddr[]: full-set +entry: + tail call i64 @ifunc(ptr noundef %uaddr) + ret void +} + +define dso_local ptr @ifunc_resolver() { +entry: + ret ptr null +} + declare void @llvm.lifetime.start.p0(i64, ptr nocapture) declare void @llvm.lifetime.end.p0(i64, ptr nocapture) From 2ab98dfe19ac384f0cfac1a1fafc56b9dd7ad9b7 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Oct 2024 09:45:23 -0700 Subject: [PATCH 312/425] [lldb] Update link to GreenDragon in the docs --- lldb/docs/resources/test.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/docs/resources/test.rst b/lldb/docs/resources/test.rst index 715d3772fe2787c..5f1bd0d5738305e 100644 --- a/lldb/docs/resources/test.rst +++ b/lldb/docs/resources/test.rst @@ -418,8 +418,8 @@ An overview of all LLDB builders can be found here: `https://lab.llvm.org/buildbot/#/builders?tags=lldb `_ Building and testing for macOS uses a different platform called GreenDragon. It -has a dedicated tab for LLDB: `https://green.lab.llvm.org/green/view/LLDB/ -`_ +has a dedicated tab for LLDB: `https://green.lab.llvm.org/job/llvm.org/view/LLDB/ +`_ Running The Tests From 2a9dd8af5ad9783d8ecba6bf93521de64bab6f81 Mon Sep 17 00:00:00 2001 From: SpencerAbson Date: Tue, 29 Oct 2024 16:55:19 +0000 Subject: [PATCH 313/425] [AArch64] Add assembly/disassembly for zeroing SVE FCVT{X} and BFCVT (#113916) This patch adds assembly/disassembly support for the following SVE2.2 instructions - FCVT (zeroing) - FCVTX (zeroing) - BFCVT (zeroing) In accordance with: https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 7 +++ llvm/lib/Target/AArch64/SVEInstrFormats.td | 9 +++ llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s | 2 +- llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s | 2 +- .../MC/AArch64/SVE2p2/bfcvt_z-diagnostics.s | 60 +++++++++++++++++++ llvm/test/MC/AArch64/SVE2p2/bfcvt_z.s | 33 ++++++++++ .../MC/AArch64/SVE2p2/fcvt_z-diagnostics.s | 50 ++++++++++++++++ llvm/test/MC/AArch64/SVE2p2/fcvt_z.s | 57 ++++++++++++++++++ .../MC/AArch64/SVE2p2/fcvtx_z-diagnostics.s | 57 ++++++++++++++++++ llvm/test/MC/AArch64/SVE2p2/fcvtx_z.s | 33 ++++++++++ 10 files changed, 308 insertions(+), 2 deletions(-) create mode 100644 llvm/test/MC/AArch64/SVE2p2/bfcvt_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/bfcvt_z.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/fcvt_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/fcvt_z.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/fcvtx_z-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/fcvtx_z.s diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 4f101d0d46b7afa..5c5ae898a8ac022 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4225,15 +4225,22 @@ defm TBLQ_ZZZ : sve2p1_tblq<"tblq", int_aarch64_sve_tblq>; // SME2.2 or SVE2.2 instructions //===----------------------------------------------------------------------===// let Predicates = [HasSVE2p2orSME2p2] in { + // SVE Floating-point convert precision, zeroing predicate + defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt">; + // SVE2p2 floating-point convert precision down (placing odd), zeroing predicate defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">; def FCVTXNT_ZPzZ_DtoS : sve_fp_fcvt2z<0b0010, "fcvtxnt", ZPR32, ZPR64>; + // Placing even + def FCVTX_ZPzZ_DtoS : sve_fp_z2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32>; // SVE2p2 floating-point convert precision up, zeroing predicate defm FCVTLT_ZPzZ : sve_fp_fcvtltz<"fcvtlt">; // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>; + // Placing corresponding + def BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd<0b1001010, "bfcvt", ZPR32, ZPR16>; // Floating-point convert to integer, zeroing predicate defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index d1ceb30f36dcdc9..88a0983aa1480d2 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3207,6 +3207,15 @@ multiclass sve_fp_z2op_p_zd_d_flogb { def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>; } +multiclass sve_fp_z2op_p_zd_b_0 { + def _StoH : sve_fp_z2op_p_zd<0b1001000, asm, ZPR32, ZPR16>; + def _HtoS : sve_fp_z2op_p_zd<0b1001001, asm, ZPR16, ZPR32>; + def _DtoH : sve_fp_z2op_p_zd<0b1101000, asm, ZPR64, ZPR16>; + def _HtoD : sve_fp_z2op_p_zd<0b1101001, asm, ZPR16, ZPR64>; + def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>; + def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>; +} + //===----------------------------------------------------------------------===// // SVE Integer Arithmetic - Binary Predicated Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s b/llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s index 013f15f8b6e0050..6c55ebe4088ff11 100644 --- a/llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s @@ -11,7 +11,7 @@ bfcvt z0.h, p0/m, z1.h // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: bfcvt z0.h, p0/z, z1.s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: bfcvt z0.h, p0/z, z1.s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s index 5f365496033633a..36c5d5fe9cbea6a 100644 --- a/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s @@ -29,7 +29,7 @@ fcvtx z0.d, p0/m, z0.d // Invalid predicate operation fcvtx z0.s, p0/z, z0.d -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: fcvtx z0.s, p0/z, z0.d // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/bfcvt_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/bfcvt_z-diagnostics.s new file mode 100644 index 000000000000000..30be5d19c4aae76 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/bfcvt_z-diagnostics.s @@ -0,0 +1,60 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid operand + +bfcvt z0.b, p0/z, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfcvt z0.b, p0/z, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid element width + +bfcvt z0.h, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfcvt z0.h, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvt z0.h, p0/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfcvt z0.h, p0/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvt z0.s, p0/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfcvt z0.s, p0/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvt z0.s, p0/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfcvt z0.s, p0/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfcvt z0.d, p0/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: bfcvt z0.d, p0/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Predicate not in restricted predicate range + +bfcvt z0.h, p8/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bfcvt z0.h, p8/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.s, p0/m, z7.s +bfcvt z0.h, p7/z, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: bfcvt z0.h, p7/z, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +bfcvt z0.h, p7/z, z1.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: bfcvt z0.h, p7/z, z1.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/bfcvt_z.s b/llvm/test/MC/AArch64/SVE2p2/bfcvt_z.s new file mode 100644 index 000000000000000..9d63ebf1e830985 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/bfcvt_z.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +bfcvt z0.h, p0/z, z0.s // 01100100-10011010-11000000-00000000 +// CHECK-INST: bfcvt z0.h, p0/z, z0.s +// CHECK-ENCODING: [0x00,0xc0,0x9a,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 649ac000 + +bfcvt z21.h, p5/z, z10.s // 01100100-10011010-11010101-01010101 +// CHECK-INST: bfcvt z21.h, p5/z, z10.s +// CHECK-ENCODING: [0x55,0xd5,0x9a,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 649ad555 + +bfcvt z31.h, p7/z, z31.s // 01100100-10011010-11011111-11111111 +// CHECK-INST: bfcvt z31.h, p7/z, z31.s +// CHECK-ENCODING: [0xff,0xdf,0x9a,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 649adfff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvt_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/fcvt_z-diagnostics.s new file mode 100644 index 000000000000000..37f4a0ffbe6a2af --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/fcvt_z-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid operand + +fcvt z0.b, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvt z0.b, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid element width + +fcvt z0.h, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvt z0.h, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvt z0.s, p0/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvt z0.s, p0/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvt z0.d, p0/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvt z0.d, p0/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Predicate not in restricted predicate range + +fcvt z0.s, p8/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: fcvt z0.s, p8/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.s, p0/m, z7.s +fcvt z0.s, p7/z, z1.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fcvt z0.s, p7/z, z1.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +fcvt z0.s, p7/z, z1.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fcvt z0.s, p7/z, z1.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvt_z.s b/llvm/test/MC/AArch64/SVE2p2/fcvt_z.s new file mode 100644 index 000000000000000..6cd9f1ba503210e --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/fcvt_z.s @@ -0,0 +1,57 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// convert to half + +fcvt z0.h, p0/z, z0.s // 01100100-10011010-10000000-00000000 +// CHECK-INST: fcvt z0.h, p0/z, z0.s +// CHECK-ENCODING: [0x00,0x80,0x9a,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 649a8000 + +fcvt z23.h, p3/z, z13.d // 01100100-11011010-10001101-10110111 +// CHECK-INST: fcvt z23.h, p3/z, z13.d +// CHECK-ENCODING: [0xb7,0x8d,0xda,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64da8db7 + +// convert to single + +fcvt z0.s, p0/z, z0.h // 01100100-10011010-10100000-00000000 +// CHECK-INST: fcvt z0.s, p0/z, z0.h +// CHECK-ENCODING: [0x00,0xa0,0x9a,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 649aa000 + +fcvt z31.s, p7/z, z31.d // 01100100-11011010-11011111-11111111 +// CHECK-INST: fcvt z31.s, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xdf,0xda,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dadfff + +// convert to double + +fcvt z21.d, p5/z, z10.h // 01100100-11011010-10110101-01010101 +// CHECK-INST: fcvt z21.d, p5/z, z10.h +// CHECK-ENCODING: [0x55,0xb5,0xda,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64dab555 + +fcvt z31.d, p7/z, z31.s // 01100100-11011010-11111111-11111111 +// CHECK-INST: fcvt z31.d, p7/z, z31.s +// CHECK-ENCODING: [0xff,0xff,0xda,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 64daffff &1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid element width + +fcvtx z0.b, p0/z, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtx z0.b, p0/z, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtx z0.h, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtx z0.h, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtx z0.s, p0/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtx z0.s, p0/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtx z0.d, p0/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtx z0.d, p0/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtx z0.h, p0/z, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtx z0.h, p0/z, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtx z0.b, p0/z, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fcvtx z0.b, p0/z, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Predicate not in restricted predicate range + +fcvtx z0.s, p8/z, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: fcvtx z0.s, p8/z, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z0.s, p0/m, z7.s +fcvtx z0.s, p7/z, z1.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fcvtx z0.s, p7/z, z1.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z0, z7 +fcvtx z0.s, p7/z, z1.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: fcvtx z0.s, p7/z, z1.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtx_z.s b/llvm/test/MC/AArch64/SVE2p2/fcvtx_z.s new file mode 100644 index 000000000000000..e5e2155ea5d8785 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/fcvtx_z.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +fcvtx z0.s, p0/z, z0.d // 01100100-00011010-11000000-00000000 +// CHECK-INST: fcvtx z0.s, p0/z, z0.d +// CHECK-ENCODING: [0x00,0xc0,0x1a,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 641ac000 + +fcvtx z23.s, p3/z, z13.d // 01100100-00011010-11001101-10110111 +// CHECK-INST: fcvtx z23.s, p3/z, z13.d +// CHECK-ENCODING: [0xb7,0xcd,0x1a,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 641acdb7 + +fcvtx z31.s, p7/z, z31.d // 01100100-00011010-11011111-11111111 +// CHECK-INST: fcvtx z31.s, p7/z, z31.d +// CHECK-ENCODING: [0xff,0xdf,0x1a,0x64] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 641adfff From 39ad84e4d173b43dcd13209dc7c62de7a0476c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Tue, 29 Oct 2024 16:57:23 +0000 Subject: [PATCH 314/425] [mlir][linalg] Split GenericPadOpVectorizationPattern into two patterns (#111349) At the moment, `GenericPadOpVectorizationPattern` implements two orthogonal transformations: 1. Rewrites `tensor::PadOp` into a sequence of `tensor::EmptyOp`, `linalg::FillOp` and `tensor::InsertSliceOp`. 2. Vectorizes (where possible) `tensor::InsertSliceOp` (see `tryVectorizeCopy`). This patch splits `GenericPadOpVectorizationPattern` into two separate patterns: 1. `GeneralizePadOpPattern` for the first transformation (note that currently `GenericPadOpVectorizationPattern` inherits from `GeneralizePadOpPattern`). 2. `InsertSliceVectorizePattern` to vectorize `tensor::InsertSliceOp`. With this change, we gain the following: * a clear separation between pre-processing and vectorization transformations/stages, * a path to support masked vectorisation for `tensor.insert_slice` (with a dedicated pattern for vectorization, it is much easier to specify the input vector sizes used in masking), * more opportunities to vectorize `tensor.insert_slice`. Note for downstream users: -------------------------- If you were using `populatePadOpVectorizationPatterns`, following this change you will also have to add `populateInsertSliceVectorizationPatterns`. Finer implementation details: ----------------------------- 1. The majority of changes in this patch are copy & paste + some edits. 1.1. The only functional change is that the vectorization of `tensor.insert_slice` is now broadly available (as opposed to being constrained to the pad vectorization pattern: `GenericPadOpVectorizationPattern`). 1.2. Following-on from the above, `@pad_and_insert_slice_dest` is updated. As expected, the input `tensor.insert_slice` Op is no longer "preserved" and instead gets vectorized successfully. 2. The `linalg.fill` case in `getConstantPadVal` works under the assumption that only _scalar_ source values can be used. That's consistent with the definition of the Op, but it's not tested at the moment. Hence a test case in Linalg/invalid.mlir is added. 3. The behaviour of the two TD vectorization Ops, `transform.structured.vectorize_children_and_apply_patterns` and `transform.structured.vectorize` is preserved. --- .../Dialect/Linalg/Transforms/Transforms.h | 14 +- .../TransformOps/LinalgTransformOps.cpp | 4 + .../Dialect/Linalg/Transforms/Transforms.cpp | 7 +- .../Linalg/Transforms/Vectorization.cpp | 279 +++++++++++------- mlir/test/Dialect/Linalg/invalid.mlir | 9 + .../Linalg/vectorization-pad-patterns.mlir | 11 +- .../Linalg/vectorization-unsupported.mlir | 29 +- .../Linalg/vectorization-with-patterns.mlir | 115 +++++++- 8 files changed, 327 insertions(+), 141 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 70b086641bdc18d..b5710bd78f00898 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -1503,18 +1503,13 @@ using OptimizeCopyFn = /// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and /// InsertSliceOp. For now, only constant padding values are supported. -/// `OptimizeCopyFn` can be used to customize copying step optimization. struct GeneralizePadOpPattern : public OpRewritePattern { - GeneralizePadOpPattern(MLIRContext *context, - OptimizeCopyFn optimizeCopyFn = nullptr, - PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), - optimizeCopyFn(std::move(optimizeCopyFn)) {} + GeneralizePadOpPattern(MLIRContext *context, PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit) {} LogicalResult matchAndRewrite(tensor::PadOp padOp, PatternRewriter &rewriter) const override; protected: - OptimizeCopyFn optimizeCopyFn; Value createFillOrGenerateOp(RewriterBase &rewriter, tensor::PadOp padOp, Value dest, const SmallVector &dynSizes) const; @@ -1663,6 +1658,11 @@ void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns, /// \see rewriteInIm2Col for more details. void populateConvertConv2DToImg2ColPatterns(RewritePatternSet &patterns); +/// Populates `patterns` with vectorisation patterns for tensor.insert_slice. +/// TODO: Avoid having a dedicated `populate{}` for one pattern. Instead, either +/// expand or merge with other `populate{}`. +void populateInsertSliceVectorizationPatterns(RewritePatternSet &patterns); + /// Populates `patterns` with patterns that vectorize tensor.pad. /// These patterns are meant to apply in a complementary fashion. Benefits /// are used to encode a certain ordering of pattern application. To avoid diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 3d3f0a93a3829bf..9c0ab4f41b855a2 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -256,6 +256,7 @@ void transform::ApplyFoldAddIntoDestPatternsOp::populatePatterns( void transform::ApplyPadVectorizationPatternsOp::populatePatterns( RewritePatternSet &patterns) { linalg::populatePadOpVectorizationPatterns(patterns); + linalg::populateInsertSliceVectorizationPatterns(patterns); } //===----------------------------------------------------------------------===// @@ -3482,6 +3483,9 @@ transform::VectorizeChildrenAndApplyPatternsOp::applyToOne( patterns.add(ctx); + // Add misc. vectorization patterns (e.g. for tensor.insert_slice) + linalg::populateInsertSliceVectorizationPatterns(patterns); + if (getVectorizePadding()) linalg::populatePadOpVectorizationPatterns(patterns); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index 0fe096863d7b016..da5233049aaf69e 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -973,12 +973,7 @@ GeneralizePadOpPattern::matchAndRewrite(tensor::PadOp padOp, padOp.getLoc(), staticSizes, resultType.getElementType(), dynSizes); Value fill = createFillOrGenerateOp(rewriter, padOp, emptyTensor, dynSizes); - // Try optimize the copy of source. - if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded()) - return success(); - - // tensor::PadOps cannot be optimized. Generate a InsertSliceOp instead - // for copying the PadOp source. + // Generate a InsertSliceOp for copying the PadOp source. auto sourceType = padOp.getSourceType(); // Compute size of source of tensor::PadOp. SmallVector srcSizes = diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 0a2457176a1d474..090e0b46768d7e9 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2281,115 +2281,6 @@ LogicalResult mlir::linalg::vectorizeCopy(RewriterBase &rewriter, //----------------------------------------------------------------------------// // Misc. vectorization patterns. //----------------------------------------------------------------------------// - -/// Helper function that retrieves the value of an IntegerAttr. -static int64_t getIntFromAttr(Attribute attr) { - return cast(attr).getInt(); -} - -/// Given an ArrayRef of OpFoldResults, return a vector of Values. -/// IntegerAttrs are converted to ConstantIndexOps. Other attribute types are -/// not supported. -static SmallVector ofrToIndexValues(RewriterBase &rewriter, Location loc, - ArrayRef ofrs) { - SmallVector result; - for (auto o : ofrs) { - if (auto val = llvm::dyn_cast_if_present(o)) { - result.push_back(val); - } else { - result.push_back(rewriter.create( - loc, getIntFromAttr(o.template get()))); - } - } - return result; -} - -/// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and -/// InsertSliceOp. For now, only constant padding values are supported. -/// If there is enough static type information, TransferReadOps and -/// TransferWriteOps may be generated instead of InsertSliceOps. -struct GenericPadOpVectorizationPattern : public GeneralizePadOpPattern { - GenericPadOpVectorizationPattern(MLIRContext *context, - PatternBenefit benefit = 1) - : GeneralizePadOpPattern(context, tryVectorizeCopy, benefit) {} - /// Vectorize the copying of a tensor::PadOp's source. This is possible if - /// each dimension size is statically know in the source type or the result - /// type (or both). - static LogicalResult tryVectorizeCopy(RewriterBase &rewriter, - tensor::PadOp padOp, Value dest) { - auto sourceType = padOp.getSourceType(); - auto resultType = padOp.getResultType(); - if (!VectorType::isValidElementType(sourceType.getElementType())) - return failure(); - - // Copy cannot be vectorized if pad value is non-constant and source shape - // is dynamic. In case of a dynamic source shape, padding must be appended - // by TransferReadOp, but TransferReadOp supports only constant padding. - auto padValue = padOp.getConstantPaddingValue(); - if (!padValue) { - if (!sourceType.hasStaticShape()) - return failure(); - // Create dummy padding value. - auto elemType = sourceType.getElementType(); - padValue = rewriter.create( - padOp.getLoc(), elemType, rewriter.getZeroAttr(elemType)); - } - - SmallVector vecShape; - SmallVector readInBounds; - SmallVector writeInBounds; - for (unsigned i = 0; i < sourceType.getRank(); ++i) { - if (!sourceType.isDynamicDim(i)) { - vecShape.push_back(sourceType.getDimSize(i)); - // Source shape is statically known: Neither read nor write are - // out-of- bounds. - readInBounds.push_back(true); - writeInBounds.push_back(true); - } else if (!resultType.isDynamicDim(i)) { - // Source shape is not statically known, but result shape is. - // Vectorize with size of result shape. This may be larger than the - // source size. - vecShape.push_back(resultType.getDimSize(i)); - // Read may be out-of-bounds because the result size could be larger - // than the source size. - readInBounds.push_back(false); - // Write is out-of-bounds if low padding > 0. - writeInBounds.push_back( - getConstantIntValue(padOp.getMixedLowPad()[i]) == - static_cast(0)); - } else { - // Neither source nor result dim of padOp is static. Cannot vectorize - // the copy. - return failure(); - } - } - auto vecType = VectorType::get(vecShape, sourceType.getElementType()); - - // Generate TransferReadOp. - SmallVector readIndices( - vecType.getRank(), - rewriter.create(padOp.getLoc(), 0)); - auto read = rewriter.create( - padOp.getLoc(), vecType, padOp.getSource(), readIndices, padValue, - ArrayRef{readInBounds}); - - // If `dest` is a FillOp and the TransferWriteOp would overwrite the - // entire tensor, write directly to the FillOp's operand. - if (llvm::equal(vecShape, resultType.getShape()) && - llvm::all_of(writeInBounds, [](bool b) { return b; })) - if (auto fill = dest.getDefiningOp()) - dest = fill.output(); - - // Generate TransferWriteOp. - auto writeIndices = - ofrToIndexValues(rewriter, padOp.getLoc(), padOp.getMixedLowPad()); - rewriter.replaceOpWithNewOp( - padOp, read, dest, writeIndices, ArrayRef{writeInBounds}); - - return success(); - } -}; - /// Base pattern for rewriting tensor::PadOps whose result is consumed by a /// given operation type OpTy. template @@ -2623,6 +2514,163 @@ struct PadOpVectorizationWithTransferWritePattern } }; +/// Returns the effective Pad value for the input op, provided it's a scalar. +/// +/// Many Ops exhibit pad-like behaviour, but this isn't always explicit. If +/// this Op performs padding, retrieve the padding value provided that it's +/// a scalar and static/fixed for all the padded values. Returns an empty value +/// otherwise. +static Value getStaticPadVal(Operation *op) { + if (!op) + return {}; + + // 1. vector.broadcast (f32 -> vector <...xf32>) - return the value that's + // being broadcast, provided that it's a scalar. + if (auto bcast = llvm::dyn_cast(op)) { + auto source = bcast.getSource(); + if (llvm::dyn_cast(source.getType())) + return {}; + + return source; + } + + // 2. linalg.fill - use the scalar input value that used to fill the output + // tensor. + if (auto fill = llvm::dyn_cast(op)) { + return fill.getInputs()[0]; + } + + // 3. tensor.generateOp - can't guarantee the value is fixed without + // analysing, bail out. + if (auto generate = llvm::dyn_cast(op)) { + return {}; + } + + // 4. vector.transfer_write - inspect the input vector that's written from. If + // if contains a single value that has been broadcast (e.g. via + // vector.broadcast), extract it, fail otherwise. + if (auto xferWrite = llvm::dyn_cast(op)) + return getStaticPadVal(xferWrite.getVector().getDefiningOp()); + + // 5. tensor.insert_slice - inspect the destination tensor. If it's larger + // than the input tensor, then, provided it's constant, we'll extract the + // value that was used to generate it (via e.g. linalg.fill), fail otherwise. + // TODO: Clarify the semantics when the input tensor is larger than the + // destination. + if (auto slice = llvm::dyn_cast(op)) + return getStaticPadVal(slice.getDest().getDefiningOp()); + + return {}; +} + +/// Rewrite tensor.insert.slice as a vector.transfer_read + +/// vector.transfer_write pair. The vector size is inferred from the static +/// dims in the input and output tensors. If a dim is dynamic in both the input +/// and output tensors, bails out. +/// +/// Before: +/// !t_in_type = tensor<1x2x3xf32> +/// !t_out_type = tensor<9x8x7x1x2x3xf32> +/// !v_type = vector<1x2x3xf32> +/// %inserted_slice = tensor.insert_slice %src into %dest ... : !t_in_type +/// into !t_out_type +/// After: +/// %read = vector.transfer_read %src[...], %pad ... : !t_in_type, !v_type +/// %write = vector.transfer_write %read, %dest ... : !v_type, !t_out_type +/// +/// TODO: Support masking +struct InsertSliceVectorizePattern + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tensor::InsertSliceOp sliceOp, + PatternRewriter &rewriter) const final { + auto sourceType = sliceOp.getSource().getType(); + if (!VectorType::isValidElementType(sourceType.getElementType())) + return failure(); + + auto resultType = sliceOp.getResultType(); + + // 1. Get the pad value. + // TransferReadOp requires a scalar padding value. Note that: + // * for in-bounds access, the value is actually irrelevant. + // There are 2 cases in which xfer.read accesses are known to be in-bounds: + // 1. The source shape is static (output vector sizes would be based on + // the source shape and hence all memory accesses would be in-bounds), + // 2. Masking is used (output vector sizes would be user-provided, in which + // case it is assumed that all memory accesses are in-bounds). This + // remains a TODO. + // + // When the value is not known and not needed, use 0. Otherwise, bail out. + Value padValue = getStaticPadVal(sliceOp); + bool isOutOfBoundsRead = !sourceType.hasStaticShape(); + + if (!padValue && isOutOfBoundsRead) { + LDBG("Failed to get a pad value for out-of-bounds read access\n"); + return failure(); + } + + if (!padValue) { + auto elemType = sourceType.getElementType(); + padValue = rewriter.create( + sliceOp.getLoc(), elemType, rewriter.getZeroAttr(elemType)); + } + + // 2. Get the vector shape and in-bounds attributes + SmallVector vecShape; + SmallVector readInBounds; + SmallVector writeInBounds; + size_t rankDiff = resultType.getRank() - sourceType.getRank(); + for (unsigned i = 0; i < sourceType.getRank(); ++i) { + if (!sourceType.isDynamicDim(i)) { + vecShape.push_back(sourceType.getDimSize(i)); + // Source shape is statically known: Neither read nor write are + // out-of-bounds. + readInBounds.push_back(true); + writeInBounds.push_back(true); + } else if (!resultType.isDynamicDim(i)) { + // Source shape is not statically known, but result shape is. + // Vectorize with size of result shape. This may be larger than the + // source size. + // FIXME: Using rankDiff implies that the source tensor is inserted at + // the end of the destination tensor. However, that's not required. + vecShape.push_back(resultType.getDimSize(rankDiff + i)); + // Read may be out-of-bounds because the result size could be larger + // than the source size. + readInBounds.push_back(false); + // Write will in-bounds provided that the corresponding write idx is 0. + // To keep this logic simple, conservatively mark as out-of-bounds. + writeInBounds.push_back(false); + } else { + // Neither source nor result dim of padOp is static. Cannot vectorize + // the copy. + // TODO: Add support for masking + return failure(); + } + } + auto vecType = VectorType::get(vecShape, sourceType.getElementType()); + + // 3. Generate TransferReadOp. + SmallVector readIndices( + vecType.getRank(), + rewriter.create(sliceOp.getLoc(), 0)); + auto read = rewriter.create( + sliceOp.getLoc(), vecType, sliceOp.getSource(), readIndices, padValue, + ArrayRef{readInBounds}); + + // 4. Generate TransferWriteOp. + auto writeIndices = getValueOrCreateConstantIndexOp( + rewriter, sliceOp.getLoc(), sliceOp.getMixedOffsets()); + + // 5. Finalize + rewriter.replaceOpWithNewOp( + sliceOp, read, sliceOp.getDest(), writeIndices, + ArrayRef{writeInBounds}); + + return success(); + } +}; + /// Rewrite use of tensor::PadOp result in InsertSliceOp. E.g.: /// ``` /// %0 = tensor.pad %src ... : tensor to tensor<17x5xf32> @@ -2699,8 +2747,8 @@ struct PadOpVectorizationWithInsertSlicePattern // Generate TransferWriteOp: Write to InsertSliceOp's dest tensor at // specified offsets. Write is fully in-bounds because a InsertSliceOp's // source must fit into the destination at the specified offsets. - auto writeIndices = - ofrToIndexValues(rewriter, padOp.getLoc(), insertOp.getMixedOffsets()); + auto writeIndices = getValueOrCreateConstantIndexOp( + rewriter, padOp.getLoc(), insertOp.getMixedOffsets()); SmallVector inBounds(vecRank, true); rewriter.replaceOpWithNewOp( insertOp, read, insertOp.getDest(), writeIndices, @@ -2710,13 +2758,18 @@ struct PadOpVectorizationWithInsertSlicePattern } }; +void mlir::linalg::populateInsertSliceVectorizationPatterns( + RewritePatternSet &patterns) { + patterns.add(patterns.getContext()); +} + void mlir::linalg::populatePadOpVectorizationPatterns( RewritePatternSet &patterns, PatternBenefit baseBenefit) { // TODO: The following pattern implements "decomposition" and // optional "vectorization". Seperate "decomposition" into a sepereate // pre-processing pattern group. - patterns.add(patterns.getContext(), - baseBenefit); + patterns.add(patterns.getContext(), baseBenefit); + // Try these specialized patterns first before resorting to the generic one. patterns.add, %arg1 : tensor<2xf32>) -> tensor<2x2xf32> +{ + // expected-error @+1 {{expected op with scalar input}} + %0 = linalg.fill ins(%arg1 : tensor<2xf32>) outs(%arg0 : tensor<2x2xf32>) -> tensor<2x2xf32> + return %0 : tensor<2x2xf32> +} + +// ----- + func.func @invalid_static_matmul(%arg0: memref<2x4xf32>, %arg1: memref<3x4xf32>, %arg2: memref<2x4xf32>) { // expected-error @+1 {{inferred input/output operand #1 has shape's dimension #0 to be 4, but found 3}} linalg.matmul ins(%arg0, %arg1 : memref<2x4xf32>, memref<3x4xf32>) diff --git a/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir index 2aa4638af3f0f3b..640de85cc5f12e2 100644 --- a/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir @@ -161,7 +161,8 @@ module attributes {transform.with_named_sequence} { ///---------------------------------------------------------------------------------------- /// tensor::PadOp -> tensor::EmptyOp + linalg::FillOp/tensor::GenerateOp + tensor::InsertSliceOp -/// [Pattern: GenericPadOpVectorizationPattern] +/// [Pattern: GenericPadOpVectorizationPattern + InsertSliceVectorizePattern] +/// TODO: Split the test into two, one for each pattern. ///---------------------------------------------------------------------------------------- func.func private @make_vector() -> tensor<12x13xf32> @@ -174,12 +175,14 @@ func.func private @make_vector() -> tensor<12x13xf32> // CHECK-NOT: tensor.pad // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[PAD:.*]] = arith.constant 5.000000e+00 : f32 +// CHECK-DAG: %[[PAD_READ:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32> // CHECK: %[[FILL:.*]] = linalg.fill ins(%[[PAD]] : f32) outs(%[[EMPTY]] : tensor<1x12x13xf32>) -> tensor<1x12x13xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32> -// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32> +// CHECK: %[[READ_1:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32> +// CHECK: %[[WRITE_1:.*]] = vector.transfer_write %[[READ_1]], %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32> // CHECK: %[[VEC:.*]] = call @make_vector() : () -> tensor<12x13xf32> -// CHECK: %[[RES:.*]] = tensor.insert_slice %[[VEC]] into %[[WRITE]][0, 0, 0] [1, 12, 13] [1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32> +// CHECK: %[[READ_2:.*]] = vector.transfer_read %[[VEC]]{{\[}}%[[C0]], %[[C0]]], %[[PAD_READ]] {in_bounds = [true, true]} : tensor<12x13xf32>, vector<12x13xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ_2]], %[[WRITE_1]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<12x13xf32>, tensor<1x12x13xf32> // CHECK: return %[[RES]] : tensor<1x12x13xf32> func.func @pad_and_insert_slice_dest( diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir index e9f8e08ca0c6b40..8fbc74ec345c6be 100644 --- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics +// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s func.func @conv1d_nwc_wcf_dyn_ch_dim(%input: memref<4x6x?xf32>, %filter: memref<1x?x8xf32>, %output: memref<4x2x8xf32>) { // expected-error @+1 {{Attempted to vectorize, but failed}} @@ -253,3 +253,30 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +// With dynamically shaped source, the vectorizer infers the vector size for +// xfer Ops from the destination tensor and, conservatively, assumes +// out-of-bounds accesses. Out-of-bounds accesses require a pad value, but +// that's impossible to recover in this example. Hence no vectorization. + +// TODO: Use diagnostics once we can vectorize tensor.insert_slice with +// transform.structured.vectorize + +// CHECK-LABEL: @insert_dynamic_slice_unknown_pad +// CHECK-NOT: vector +// CHECK: tensor.insert_slice +func.func @insert_dynamic_slice_unknown_pad(%arg0: tensor<1x?x3xf32>, %arg1: tensor<9x8x7x1x2x3xf32>, %size: index) -> tensor<9x8x7x1x2x3xf32> { + %res = tensor.insert_slice %arg0 into %arg1[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, %size, 3][1, 1, 1, 1, 1, 1] : tensor<1x?x3xf32> into tensor<9x8x7x1x2x3xf32> + return %res : tensor<9x8x7x1x2x3xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir index 189507d97d6dc2f..d2fb3730a2d2b06 100644 --- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir @@ -939,16 +939,20 @@ module attributes {transform.with_named_sequence} { func.func private @make_vector() -> tensor<12x13xf32> -// CHECK-LABEL: func @pad_and_insert_slice_dest -// CHECK-SAME: %[[ARG0:.*]]: tensor<1x5x6xf32> -// Check the insert slice is not rewritten if the padded result is used by the destination operand. -// CHECK-NOT: tensor.pad -// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32> -// CHECK: %[[WRITE_1:.*]] = vector.transfer_write %{{.*}}, %[[EMPTY]]{{.*}} : vector<1x12x13xf32>, tensor<1x12x13xf32> -// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0:.*]]{{.*}} : tensor<1x5x6xf32>, vector<1x5x6xf32> -// CHECK: %[[WRITE_2:.*]] = vector.transfer_write %[[READ]], %[[WRITE_1]]{{.*}} : vector<1x5x6xf32>, tensor<1x12x13xf32> -// CHECK: %[[T1:.*]] = call @make_vector() : () -> tensor<12x13xf32> -// CHECK: tensor.insert_slice %[[T1]] into %[[WRITE_2]] +// CHECK-LABEL: func.func @pad_and_insert_slice_dest( +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { +// CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[CST:.*]] = arith.constant dense<5.000000e+00> : vector<1x12x13xf32> +// CHECK: %[[C0_IDX:.*]] = arith.constant 0 : index +// CHECK: %[[PAD_VAL:.*]] = arith.constant 5.000000e+00 : f32 +// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32> +// CHECK: %[[WRITE_1:.*]] = vector.transfer_write %[[CST]], %[[EMPTY]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true, true]} : vector<1x12x13xf32>, tensor<1x12x13xf32> +// CHECK: %[[READ_1:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]], %[[PAD_VAL]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32> +// CHECK: %[[WRITE_2:.*]] = vector.transfer_write %[[READ_1]], %[[WRITE_1]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32> +// CHECK: %[[MAKE_VEC:.*]] = call @make_vector() : () -> tensor<12x13xf32> +// CHECK: %[[READ_2:.*]] = vector.transfer_read %[[MAKE_VEC]]{{\[}}%[[C0_IDX]], %[[C0_IDX]]], %[[C0]] {in_bounds = [true, true]} : tensor<12x13xf32>, vector<12x13xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ_2]], %[[WRITE_2]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true]} : vector<12x13xf32>, tensor<1x12x13xf32> +// CHECK: return %[[RES]] : tensor<1x12x13xf32> func.func @pad_and_insert_slice_dest( %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> { %c5 = arith.constant 5.0 : f32 @@ -1924,3 +1928,94 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +///---------------------------------------------------------------------------------------- +/// tensor.insert_slice +///---------------------------------------------------------------------------------------- + +// The pad value for xfer-read is neither needed nor available - use the default (0.0). + +// CHECK-LABEL: func @insert_static_slice_default_pad +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x2x3xf32>, +// CHECK-SAME: %[[ARG_1:.*]]: tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> { +// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[C0:.*]] = arith.constant 0 : index +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x2x3xf32>, vector<1x2x3xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[ARG_1]]{{\[}}%[[C0]], %[[C0]], %[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: return %[[WRITE]] : tensor<9x8x7x1x2x3xf32> +func.func @insert_static_slice_default_pad(%arg0: tensor<1x2x3xf32>, %arg1: tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> { + %res = tensor.insert_slice %arg0 into %arg1[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 2, 3][1, 1, 1, 1, 1, 1] : tensor<1x2x3xf32> into tensor<9x8x7x1x2x3xf32> + return %res : tensor<9x8x7x1x2x3xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// Same as above, but there's a pad value available that should be used instead of the default value. + +// CHECK-LABEL: func.func @insert_static_slice_non_zero_pad +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x2x3xf32>, +// CHECK-SAME: %[[PAD:.*]]: f32) -> tensor<9x8x7x1x2x3xf32> { +// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32> +// CHECK: %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x2x3xf32>, vector<1x2x3xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: return %[[RES]] : tensor<9x8x7x1x2x3xf32> +func.func @insert_static_slice_non_zero_pad(%arg0: tensor<1x2x3xf32>, %pad : f32) -> tensor<9x8x7x1x2x3xf32> { + %init = tensor.empty() : tensor<9x8x7x1x2x3xf32> + %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> + %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 2, 3][1, 1, 1, 1, 1, 1] : tensor<1x2x3xf32> into tensor<9x8x7x1x2x3xf32> + return %res : tensor<9x8x7x1x2x3xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} + +// ----- + +// Same as above, but the source type has is dynamically shaped. This means +// that the pad value is now required and the vector dim corresponding to the +// dynamic shape has to be inferred from the shape of the destination tensor. + +// CHECK-LABEL: func.func @insert_dynamic_slice_non_zero_pad( +// CHECK-SAME: %[[ARG_0:.*]]: tensor<1x?x3xf32>, +// CHECK-SAME: %[[PAD:.*]]: f32, +// CHECK-SAME: %[[SIZE:.*]]: index) -> tensor<9x8x7x1x2x3xf32> { +// CHECK: %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32> +// CHECK: %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32> +// CHECK: %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, false, true]} : tensor<1x?x3xf32>, vector<1x2x3xf32> +// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32> +// CHECK: return %[[RES]] : tensor<9x8x7x1x2x3xf32> +func.func @insert_dynamic_slice_non_zero_pad(%arg0: tensor<1x?x3xf32>, %pad : f32, %size: index) -> tensor<9x8x7x1x2x3xf32> { + %init = tensor.empty() : tensor<9x8x7x1x2x3xf32> + %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> + %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, %size, 3][1, 1, 1, 1, 1, 1] : tensor<1x?x3xf32> into tensor<9x8x7x1x2x3xf32> + return %res : tensor<9x8x7x1x2x3xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op + %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op + transform.yield + } +} From 12a8f504cfe25afab97e288a44e1d5b1925d24cf Mon Sep 17 00:00:00 2001 From: Jorge Gorbe Moya Date: Tue, 29 Oct 2024 09:56:15 -0700 Subject: [PATCH 315/425] [SandboxIR] Use the proper gmock public header in unit tests. This should fix the BuildKite bazel build. --- llvm/unittests/SandboxIR/SandboxIRTest.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp index 99e14292a91b927..874c32c2d4398ff 100644 --- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp +++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp @@ -21,8 +21,7 @@ #include "llvm/SandboxIR/Utils.h" #include "llvm/SandboxIR/Value.h" #include "llvm/Support/SourceMgr.h" -#include "gmock/gmock-matchers.h" -#include "gmock/gmock-more-matchers.h" +#include "gmock/gmock.h" #include "gtest/gtest.h" using namespace llvm; From 0b700f23335e9206e1e460a477df2103ce3c186d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 29 Oct 2024 10:01:49 -0700 Subject: [PATCH 316/425] [flang][cuda] Add entry point to launch global function with cluster_dims (#113958) --- flang/include/flang/Runtime/CUDA/kernel.h | 8 +++++++- flang/runtime/CUDA/kernel.cpp | 25 ++++++++++++++++++++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/flang/include/flang/Runtime/CUDA/kernel.h b/flang/include/flang/Runtime/CUDA/kernel.h index cf07d874a082c0b..85afda09e347ae4 100644 --- a/flang/include/flang/Runtime/CUDA/kernel.h +++ b/flang/include/flang/Runtime/CUDA/kernel.h @@ -15,13 +15,19 @@ extern "C" { -// This function uses intptr_t instead of CUDA's unsigned int to match +// These functions use intptr_t instead of CUDA's unsigned int to match // the type of MLIR's index type. This avoids the need for casts in the // generated MLIR code. + void RTDEF(CUFLaunchKernel)(const void *kernelName, intptr_t gridX, intptr_t gridY, intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ, int32_t smem, void **params, void **extra); +void RTDEF(CUFLaunchClusterKernel)(const void *kernelName, intptr_t clusterX, + intptr_t clusterY, intptr_t clusterZ, intptr_t gridX, intptr_t gridY, + intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ, + int32_t smem, void **params, void **extra); + } // extern "C" #endif // FORTRAN_RUNTIME_CUDA_KERNEL_H_ diff --git a/flang/runtime/CUDA/kernel.cpp b/flang/runtime/CUDA/kernel.cpp index f81153a1af4bc77..abb7ebb72e59231 100644 --- a/flang/runtime/CUDA/kernel.cpp +++ b/flang/runtime/CUDA/kernel.cpp @@ -25,9 +25,32 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY, blockDim.x = blockX; blockDim.y = blockY; blockDim.z = blockZ; - cudaStream_t stream = 0; + cudaStream_t stream = 0; // TODO stream managment CUDA_REPORT_IF_ERROR( cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, stream)); } +void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX, + intptr_t clusterY, intptr_t clusterZ, intptr_t gridX, intptr_t gridY, + intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ, + int32_t smem, void **params, void **extra) { + cudaLaunchConfig_t config; + config.gridDim.x = gridX; + config.gridDim.y = gridY; + config.gridDim.z = gridZ; + config.blockDim.x = blockX; + config.blockDim.y = blockY; + config.blockDim.z = blockZ; + config.dynamicSmemBytes = smem; + config.stream = 0; // TODO stream managment + cudaLaunchAttribute launchAttr[1]; + launchAttr[0].id = cudaLaunchAttributeClusterDimension; + launchAttr[0].val.clusterDim.x = clusterX; + launchAttr[0].val.clusterDim.y = clusterY; + launchAttr[0].val.clusterDim.z = clusterZ; + config.numAttrs = 1; + config.attrs = launchAttr; + CUDA_REPORT_IF_ERROR(cudaLaunchKernelExC(&config, kernel, params)); +} + } // extern "C" From b05fec97d59898a63a3e303122bbc7fc5e29ced8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 29 Oct 2024 10:02:08 -0700 Subject: [PATCH 317/425] [flang][cuda] Convert gpu.launch_func to CUFLaunchClusterKernel when cluster dims are present (#113959) Kernel launch in CUF are converted to `gpu.launch_func`. When the kernel has `cluster_dims` specified these get carried over to the `gpu.launch_func` operation. This patch updates the special conversion of `gpu.launch_func` when cluster dims are present to the newly added entry point. --- .../Transforms/CUFGPUToLLVMConversion.cpp | 83 ++++++++++++------- flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir | 24 +++++- 2 files changed, 76 insertions(+), 31 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp index 5645ce6e6858c82..c64f35542a6e590 100644 --- a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp @@ -76,11 +76,6 @@ struct GPULaunchKernelConversion mlir::LogicalResult matchAndRewrite(mlir::gpu::LaunchFuncOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - - if (op.hasClusterSize()) { - return mlir::failure(); - } - mlir::Location loc = op.getLoc(); auto *ctx = rewriter.getContext(); mlir::ModuleOp mod = op->getParentOfType(); @@ -107,37 +102,65 @@ struct GPULaunchKernelConversion rewriter.create(loc, ptrTy, kernel.getName()); } - auto funcOp = mod.lookupSymbol( - RTNAME_STRING(CUFLaunchKernel)); - auto llvmIntPtrType = mlir::IntegerType::get( ctx, this->getTypeConverter()->getPointerBitwidth(0)); auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx); - auto funcTy = mlir::LLVM::LLVMFunctionType::get( - voidTy, - {ptrTy, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, - llvmIntPtrType, llvmIntPtrType, i32Ty, ptrTy, ptrTy}, - /*isVarArg=*/false); - - auto cufLaunchKernel = mlir::SymbolRefAttr::get( - mod.getContext(), RTNAME_STRING(CUFLaunchKernel)); - if (!funcOp) { - mlir::OpBuilder::InsertionGuard insertGuard(rewriter); - rewriter.setInsertionPointToStart(mod.getBody()); - auto launchKernelFuncOp = rewriter.create( - loc, RTNAME_STRING(CUFLaunchKernel), funcTy); - launchKernelFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private); - } mlir::Value nullPtr = rewriter.create(loc, ptrTy); - rewriter.replaceOpWithNewOp( - op, funcTy, cufLaunchKernel, - mlir::ValueRange{kernelPtr, adaptor.getGridSizeX(), - adaptor.getGridSizeY(), adaptor.getGridSizeZ(), - adaptor.getBlockSizeX(), adaptor.getBlockSizeY(), - adaptor.getBlockSizeZ(), dynamicMemorySize, kernelArgs, - nullPtr}); + if (op.hasClusterSize()) { + auto funcOp = mod.lookupSymbol( + RTNAME_STRING(CUFLaunchClusterKernel)); + auto funcTy = mlir::LLVM::LLVMFunctionType::get( + voidTy, + {ptrTy, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, + llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, + llvmIntPtrType, llvmIntPtrType, i32Ty, ptrTy, ptrTy}, + /*isVarArg=*/false); + auto cufLaunchClusterKernel = mlir::SymbolRefAttr::get( + mod.getContext(), RTNAME_STRING(CUFLaunchClusterKernel)); + if (!funcOp) { + mlir::OpBuilder::InsertionGuard insertGuard(rewriter); + rewriter.setInsertionPointToStart(mod.getBody()); + auto launchKernelFuncOp = rewriter.create( + loc, RTNAME_STRING(CUFLaunchClusterKernel), funcTy); + launchKernelFuncOp.setVisibility( + mlir::SymbolTable::Visibility::Private); + } + rewriter.replaceOpWithNewOp( + op, funcTy, cufLaunchClusterKernel, + mlir::ValueRange{kernelPtr, adaptor.getClusterSizeX(), + adaptor.getClusterSizeY(), adaptor.getClusterSizeZ(), + adaptor.getGridSizeX(), adaptor.getGridSizeY(), + adaptor.getGridSizeZ(), adaptor.getBlockSizeX(), + adaptor.getBlockSizeY(), adaptor.getBlockSizeZ(), + dynamicMemorySize, kernelArgs, nullPtr}); + } else { + auto funcOp = mod.lookupSymbol( + RTNAME_STRING(CUFLaunchKernel)); + auto funcTy = mlir::LLVM::LLVMFunctionType::get( + voidTy, + {ptrTy, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, + llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, i32Ty, ptrTy, ptrTy}, + /*isVarArg=*/false); + auto cufLaunchKernel = mlir::SymbolRefAttr::get( + mod.getContext(), RTNAME_STRING(CUFLaunchKernel)); + if (!funcOp) { + mlir::OpBuilder::InsertionGuard insertGuard(rewriter); + rewriter.setInsertionPointToStart(mod.getBody()); + auto launchKernelFuncOp = rewriter.create( + loc, RTNAME_STRING(CUFLaunchKernel), funcTy); + launchKernelFuncOp.setVisibility( + mlir::SymbolTable::Visibility::Private); + } + rewriter.replaceOpWithNewOp( + op, funcTy, cufLaunchKernel, + mlir::ValueRange{kernelPtr, adaptor.getGridSizeX(), + adaptor.getGridSizeY(), adaptor.getGridSizeZ(), + adaptor.getBlockSizeX(), adaptor.getBlockSizeY(), + adaptor.getBlockSizeZ(), dynamicMemorySize, + kernelArgs, nullPtr}); + } return mlir::success(); } diff --git a/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir b/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir index f10bd82f978dc4d..7fede7c6c17b780 100644 --- a/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir +++ b/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir @@ -1,4 +1,4 @@ -// RUN: fir-opt --cuf-gpu-convert-to-llvm %s | FileCheck %s +// RUN: fir-opt --split-input-file --cuf-gpu-convert-to-llvm %s | FileCheck %s module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (git@github.com:clementval/llvm-project.git ddcfd4d2dc17bf66cee8c3ef6284118684a2b0e6)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { llvm.func @_QMmod1Phost_sub() { @@ -102,3 +102,25 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : ve // CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1 : !llvm.ptr // CHECK: llvm.call @_FortranACUFLaunchKernel(%[[KERNEL_PTR]], {{.*}}) + +// ----- + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (git@github.com:clementval/llvm-project.git 4116c1370ff76adf1e58eb3c39d0a14721794c70)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { + llvm.func @_FortranACUFLaunchClusterKernel(!llvm.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, !llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} + llvm.func @_QMmod1Psub1() attributes {cuf.cluster_dims = #cuf.cluster_dims} { + llvm.return + } + llvm.func @_QQmain() attributes {fir.bindc_name = "test"} { + %0 = llvm.mlir.constant(1 : index) : i64 + %1 = llvm.mlir.constant(2 : index) : i64 + %2 = llvm.mlir.constant(0 : i32) : i32 + %3 = llvm.mlir.constant(10 : index) : i64 + gpu.launch_func @cuda_device_mod::@_QMmod1Psub1 clusters in (%1, %1, %0) blocks in (%3, %3, %0) threads in (%3, %3, %0) : i64 dynamic_shared_memory_size %2 + llvm.return + } + gpu.binary @cuda_device_mod [#gpu.object<#nvvm.target, "">] +} + +// CHECK-LABEL: llvm.func @_QQmain() +// CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1 +// CHECK: llvm.call @_FortranACUFLaunchClusterKernel(%[[KERNEL_PTR]], {{.*}}) From a1f2fb6078bbed8034ce28eafc3518268e25f2ff Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Tue, 29 Oct 2024 17:05:12 +0000 Subject: [PATCH 318/425] [MLIR][OpenMP] Prevent composite omp.simd related crashes (#113680) This patch updates the translation of `omp.wsloop` with a nested `omp.simd` to prevent uses of block arguments defined by the latter from triggering null pointer dereferences. This happens because the inner `omp.simd` operation representing composite `do simd` constructs is currently skipped and not translated, but this results in block arguments defined by it not being mapped to an LLVM value. The proposed solution is to map these block arguments to the LLVM value associated to the corresponding operand, which is defined above. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 66 ++++++++++++++- mlir/test/Target/LLVMIR/openmp-reduction.mlir | 80 +++++++++++++++++++ 2 files changed, 143 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index fc2f88b766f1c56..d20e5e40076bc3f 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -262,6 +262,62 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { llvm_unreachable("Unknown ClauseProcBindKind kind"); } +/// Helper function to map block arguments defined by ignored loop wrappers to +/// LLVM values and prevent any uses of those from triggering null pointer +/// dereferences. +/// +/// This must be called after block arguments of parent wrappers have already +/// been mapped to LLVM IR values. +static LogicalResult +convertIgnoredWrapper(omp::LoopWrapperInterface &opInst, + LLVM::ModuleTranslation &moduleTranslation) { + // Map block arguments directly to the LLVM value associated to the + // corresponding operand. This is semantically equivalent to this wrapper not + // being present. + auto forwardArgs = + [&moduleTranslation](llvm::ArrayRef blockArgs, + OperandRange operands) { + for (auto [arg, var] : llvm::zip_equal(blockArgs, operands)) + moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var)); + }; + + return llvm::TypeSwitch(opInst) + .Case([&](omp::SimdOp op) { + auto blockArgIface = cast(*op); + forwardArgs(blockArgIface.getPrivateBlockArgs(), op.getPrivateVars()); + forwardArgs(blockArgIface.getReductionBlockArgs(), + op.getReductionVars()); + return success(); + }) + .Default([&](Operation *op) { + return op->emitError() << "cannot ignore nested wrapper"; + }); +} + +/// Helper function to call \c convertIgnoredWrapper() for all wrappers of the +/// given \c loopOp nested inside of \c parentOp. This has the effect of mapping +/// entry block arguments defined by these operations to outside values. +/// +/// It must be called after block arguments of \c parentOp have already been +/// mapped themselves. +static LogicalResult +convertIgnoredWrappers(omp::LoopNestOp loopOp, + omp::LoopWrapperInterface parentOp, + LLVM::ModuleTranslation &moduleTranslation) { + SmallVector wrappers; + loopOp.gatherWrappers(wrappers); + + // Process wrappers nested inside of `parentOp` from outermost to innermost. + for (auto it = + std::next(std::find(wrappers.rbegin(), wrappers.rend(), parentOp)); + it != wrappers.rend(); ++it) { + if (failed(convertIgnoredWrapper(*it, moduleTranslation))) + return failure(); + } + + return success(); +} + /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, @@ -1262,9 +1318,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, !wsloopOp.getPrivateVars().empty() || wsloopOp.getPrivateSyms()) return opInst.emitError("unhandled clauses for translation to LLVM IR"); - // FIXME: Here any other nested wrappers (e.g. omp.simd) are skipped, so - // codegen for composite constructs like 'DO/FOR SIMD' will be the same as for - // 'DO/FOR'. auto loopOp = cast(wsloopOp.getWrappedLoop()); llvm::ArrayRef isByRef = getIsByRef(wsloopOp.getReductionByref()); @@ -1302,6 +1355,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, isByRef))) return failure(); + // TODO: Replace this with proper composite translation support. + // Currently, all nested wrappers are ignored, so 'do/for simd' will be + // treated the same as a standalone 'do/for'. This is allowed by the spec, + // since it's equivalent to always using a SIMD length of 1. + if (failed(convertIgnoredWrappers(loopOp, wsloopOp, moduleTranslation))) + return failure(); + // Store the mapping between reduction variables and their private copies on // ModuleTranslation stack. It can be then recovered when translating // omp.reduce operations in a separate call. diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir index 6d74a925b87b5c9..11c8559044be025 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir @@ -586,3 +586,83 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) { // Reduction function. // CHECK: define internal void @[[REDFUNC]] // CHECK: add i32 + +// ----- + +omp.declare_reduction @add_f32 : f32 +init { +^bb0(%arg: f32): + %0 = llvm.mlir.constant(0.0 : f32) : f32 + omp.yield (%0 : f32) +} +combiner { +^bb1(%arg0: f32, %arg1: f32): + %1 = llvm.fadd %arg0, %arg1 : f32 + omp.yield (%1 : f32) +} +atomic { +^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): + %2 = llvm.load %arg3 : !llvm.ptr -> f32 + llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 + omp.yield +} + +// CHECK-LABEL: @wsloop_simd_reduction +llvm.func @wsloop_simd_reduction(%lb : i64, %ub : i64, %step : i64) { + %c1 = llvm.mlir.constant(1 : i32) : i32 + %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr + omp.parallel { + omp.wsloop reduction(@add_f32 %0 -> %prv1 : !llvm.ptr) { + omp.simd reduction(@add_f32 %prv1 -> %prv2 : !llvm.ptr) { + omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) { + %1 = llvm.mlir.constant(2.0 : f32) : f32 + %2 = llvm.load %prv2 : !llvm.ptr -> f32 + %3 = llvm.fadd %1, %2 : f32 + llvm.store %3, %prv2 : f32, !llvm.ptr + omp.yield + } + } {omp.composite} + } {omp.composite} + omp.terminator + } + llvm.return +} + +// Same checks as for wsloop reduction, because currently omp.simd is ignored in +// a composite 'do/for simd' construct. +// Call to the outlined function. +// CHECK: call void {{.*}} @__kmpc_fork_call +// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] + +// Outlined function. +// CHECK: define internal void @[[OUTLINED]] + +// Private reduction variable and its initialization. +// CHECK: %[[PRIVATE:.+]] = alloca float +// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]] + +// Call to the reduction function. +// CHECK: call i32 @__kmpc_reduce +// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] + +// Atomic reduction. +// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] +// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]] + +// Non-atomic reduction: +// CHECK: fadd float +// CHECK: call void @__kmpc_end_reduce +// CHECK: br label %[[FINALIZE:.+]] + +// CHECK: [[FINALIZE]]: +// CHECK: call void @__kmpc_barrier + +// Update of the private variable using the reduction region +// (the body block currently comes after all the other blocks). +// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]] +// CHECK: %[[UPDATED:.+]] = fadd float 2.000000e+00, %[[PARTIAL]] +// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]] + +// Reduction function. +// CHECK: define internal void @[[REDFUNC]] +// CHECK: fadd float From f53889ffcad28bbc0faf671626cc90eb4e7da5a8 Mon Sep 17 00:00:00 2001 From: Jubilee Date: Tue, 29 Oct 2024 10:07:20 -0700 Subject: [PATCH 319/425] [RISCV] Allow crypto features to imply dependents (#112659) This relationship is a logical dependency. Note Zvbc and Zvknhb. They are explicitly called out in the spec as requiring 64 bits: - https://github.com/riscv/riscv-crypto/blob/56ed7952d13eb5bdff92e2b522404668952f416d/doc/vector/riscv-crypto-spec-vector.adoc --- llvm/lib/Target/RISCV/RISCVFeatures.td | 27 +++++--- .../TargetParser/RISCVISAInfoTest.cpp | 68 ++++--------------- 2 files changed, 33 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 608782d7839a9fa..6f43c832fd4d2a2 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -733,7 +733,8 @@ def HasStdExtZfhOrZvfh def FeatureStdExtZvkb : RISCVExtension<"zvkb", 1, 0, - "'Zvkb' (Vector Bit-manipulation used in Cryptography)">, + "'Zvkb' (Vector Bit-manipulation used in Cryptography)", + [FeatureStdExtZve32x]>, RISCVExtensionBitmask<0, 52>; def HasStdExtZvkb : Predicate<"Subtarget->hasStdExtZvkb()">, AssemblerPredicate<(all_of FeatureStdExtZvkb), @@ -750,7 +751,8 @@ def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">, def FeatureStdExtZvbc : RISCVExtension<"zvbc", 1, 0, - "'Zvbc' (Vector Carryless Multiplication)">, + "'Zvbc' (Vector Carryless Multiplication)", + [FeatureStdExtZve64x]>, RISCVExtensionBitmask<0, 49>; def HasStdExtZvbc : Predicate<"Subtarget->hasStdExtZvbc()">, AssemblerPredicate<(all_of FeatureStdExtZvbc), @@ -758,7 +760,8 @@ def HasStdExtZvbc : Predicate<"Subtarget->hasStdExtZvbc()">, def FeatureStdExtZvbc32e : RISCVExperimentalExtension<"zvbc32e", 0, 7, - "'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements)">; + "'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements)", + [FeatureStdExtZve32x]>; def HasStdExtZvbcOrZvbc32e : Predicate<"Subtarget->hasStdExtZvbc() || Subtarget->hasStdExtZvbc32e()">, AssemblerPredicate<(any_of FeatureStdExtZvbc, FeatureStdExtZvbc32e), @@ -766,7 +769,8 @@ def HasStdExtZvbcOrZvbc32e : Predicate<"Subtarget->hasStdExtZvbc() || Subtarget- def FeatureStdExtZvkg : RISCVExtension<"zvkg", 1, 0, - "'Zvkg' (Vector GCM instructions for Cryptography)">, + "'Zvkg' (Vector GCM instructions for Cryptography)", + [FeatureStdExtZve32x]>, RISCVExtensionBitmask<0, 53>; def HasStdExtZvkg : Predicate<"Subtarget->hasStdExtZvkg()">, AssemblerPredicate<(all_of FeatureStdExtZvkg), @@ -782,7 +786,8 @@ def HasStdExtZvkgs : Predicate<"Subtarget->hasStdExtZvkgs()">, def FeatureStdExtZvkned : RISCVExtension<"zvkned", 1, 0, - "'Zvkned' (Vector AES Encryption & Decryption (Single Round))">, + "'Zvkned' (Vector AES Encryption & Decryption (Single Round))", + [FeatureStdExtZve32x]>, RISCVExtensionBitmask<0, 54>; def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">, AssemblerPredicate<(all_of FeatureStdExtZvkned), @@ -790,7 +795,8 @@ def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">, def FeatureStdExtZvknha : RISCVExtension<"zvknha", 1, 0, - "'Zvknha' (Vector SHA-2 (SHA-256 only))">, + "'Zvknha' (Vector SHA-2 (SHA-256 only))", + [FeatureStdExtZve32x]>, RISCVExtensionBitmask<0, 55>; def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">, AssemblerPredicate<(all_of FeatureStdExtZvknha), @@ -798,7 +804,8 @@ def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">, def FeatureStdExtZvknhb : RISCVExtension<"zvknhb", 1, 0, - "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))">, + "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))", + [FeatureStdExtZve64x]>, RISCVExtensionBitmask<0, 56>; def HasStdExtZvknhb : Predicate<"Subtarget->hasStdExtZvknhb()">, AssemblerPredicate<(all_of FeatureStdExtZvknhb), @@ -810,7 +817,8 @@ def HasStdExtZvknhaOrZvknhb : Predicate<"Subtarget->hasStdExtZvknha() || Subtarg def FeatureStdExtZvksed : RISCVExtension<"zvksed", 1, 0, - "'Zvksed' (SM4 Block Cipher Instructions)">, + "'Zvksed' (SM4 Block Cipher Instructions)", + [FeatureStdExtZve32x]>, RISCVExtensionBitmask<0, 57>; def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">, AssemblerPredicate<(all_of FeatureStdExtZvksed), @@ -818,7 +826,8 @@ def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">, def FeatureStdExtZvksh : RISCVExtension<"zvksh", 1, 0, - "'Zvksh' (SM3 Hash Function Instructions)">, + "'Zvksh' (SM3 Hash Function Instructions)", + [FeatureStdExtZve32x]>, RISCVExtensionBitmask<0, 58>; def HasStdExtZvksh : Predicate<"Subtarget->hasStdExtZvksh()">, AssemblerPredicate<(all_of FeatureStdExtZvksh), diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp index a1d493e12fda6df..30f80601d96cbba 100644 --- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp +++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp @@ -643,60 +643,22 @@ TEST(ParseArchString, MissingDepency) { "'zvl*b' requires 'v' or 'zve*' extension to also be specified"); } - for (StringRef Input : {"rv32i_zvbb"}) { + // These all have an implication relationship, thus should pass + for (StringRef Input : { + "rv32i_zvbb", + "rv32i_zvbc32e0p7", + "rv32i_zvbc", + "rv32i_zvkb", + "rv32i_zvkg", + "rv32i_zvkgs0p7", + "rv32i_zvkned", + "rv32i_zvknha", + "rv32i_zvksed", + "rv32i_zvksh", + "rv32i_zvknhb", + }) { EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvbb' requires 'v' or 'zve*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvbc32e0p7"}) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvbc32e' requires 'v' or 'zve*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvbc"}) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvbc' requires 'v' or 'zve64*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvkb"}) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvkb' requires 'v' or 'zve*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvkg"}) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvkg' requires 'v' or 'zve*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvkgs0p7"}) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvkg' requires 'v' or 'zve*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvkned"}) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvkned' requires 'v' or 'zve*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvknha"}) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvknha' requires 'v' or 'zve*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvksed"}) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvksed' requires 'v' or 'zve*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvksh"}) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvksh' requires 'v' or 'zve*' extension to also be specified"); - } - - for (StringRef Input : {"rv32i_zvknhb"}) { - EXPECT_EQ( - toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "'zvknhb' requires 'v' or 'zve64*' extension to also be specified"); + ""); } for (StringRef Input : {"rv32i_zacas1p0"}) { From b1d0fe095ba93df47b5db20a3bd55f9ff857836e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Oct 2024 10:09:28 -0700 Subject: [PATCH 320/425] [RISCV] Remove trailing whitespace. NFC --- llvm/lib/Target/RISCV/RISCVFeatures.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 6f43c832fd4d2a2..1e4bf1b8830bcc7 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1040,7 +1040,7 @@ def FeatureStdExtSvpbmt def FeatureStdExtSvvptc : RISCVExtension<"svvptc", 1, 0, - "'svvptc' (Obviating Memory-Management Instructions after Marking PTEs Valid)">; + "'svvptc' (Obviating Memory-Management Instructions after Marking PTEs Valid)">; def FeatureStdExtSha : RISCVExtension<"sha", 1, 0, From f964514490ecf6d57dc9f53ebda913a6fe1e3abd Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 29 Oct 2024 13:16:20 -0400 Subject: [PATCH 321/425] Nominate Shafik Yaghmour and Vlad Serebrennikov for C++ conformance (#114071) Shafik and Vlad are both members of WG21 and both have familiarity with reasoning about the C++ standard. They've both volunteered to help answer conformance related questions, and this is an area where we get quite a bit of questions so having a larger stable of maintainers is quite useful. --- clang/Maintainers.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index 39f46457e676a80..08dcc584f6c5748 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -231,6 +231,12 @@ C++ conformance | Hubert Tong | hubert.reinterpretcast\@gmail.com (email), hubert.reinterpretcast (Phabricator), hubert-reinterpretcast (GitHub) +| Shafik Yaghmour +| shafik.yaghmour\@intel.com (email), shafik (GitHub), shafik.yaghmour (Discord), shafik (Discourse) + +| Vlad Serebrennikov +| serebrennikov.vladislav\@gmail.com (email), Endilll (GitHub), Endill (Discord), Endill (Discourse) + C++ Defect Reports ~~~~~~~~~~~~~~~~~~ From 9a5b3a1bbca6790602ec3291da850fc4485cc807 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Tue, 29 Oct 2024 10:17:35 -0700 Subject: [PATCH 322/425] [DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic (#111884) fixes #112974 partially fixes #70103 ### Changes - Added new tablegen based way of lowering dx intrinsics to DXIL ops. - Added int_dx_group_memory_barrier_with_group_sync intrinsic in IntrinsicsDirectX.td - Added expansion for int_dx_group_memory_barrier_with_group_sync in DXILIntrinsicExpansion.cpp` - Added DXIL backend test case ### Related PRs * [[clang][HLSL] Add GroupMemoryBarrierWithGroupSync intrinsic #111883](https://github.com/llvm/llvm-project/pull/111883) * [[SPIRV] Add GroupMemoryBarrierWithGroupSync intrinsic #111888](https://github.com/llvm/llvm-project/pull/111888) --- llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 + llvm/lib/Target/DirectX/DXIL.td | 54 ++++++++ llvm/lib/Target/DirectX/DXILOpLowering.cpp | 45 +++++-- .../group_memory_barrier_with_group_sync.ll | 8 ++ llvm/utils/TableGen/DXILEmitter.cpp | 122 ++++++++++++++++-- 5 files changed, 209 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index e30d37f69f781ea..dada426368995d7 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -92,4 +92,6 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>; def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + +def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>; } diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 1e8dc63ffa257e1..263ca50011aa7b5 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -294,6 +294,43 @@ class Attributes attrs> { list op_attrs = attrs; } +class DXILConstant { + int value = value_; +} + +defset list BarrierModes = { + def BarrierMode_DeviceMemoryBarrier : DXILConstant<2>; + def BarrierMode_DeviceMemoryBarrierWithGroupSync : DXILConstant<3>; + def BarrierMode_GroupMemoryBarrier : DXILConstant<8>; + def BarrierMode_GroupMemoryBarrierWithGroupSync : DXILConstant<9>; + def BarrierMode_AllMemoryBarrier : DXILConstant<10>; + def BarrierMode_AllMemoryBarrierWithGroupSync : DXILConstant<11>; +} + +// Intrinsic arg selection +class Arg { + int index = -1; + DXILConstant value; + bit is_i8 = 0; + bit is_i32 = 0; +} +class ArgSelect : Arg { + let index = index_; +} +class ArgI32 : Arg { + let value = value_; + let is_i32 = 1; +} +class ArgI8 : Arg { + let value = value_; + let is_i8 = 1; +} + +class IntrinsicSelect args_> { + Intrinsic intrinsic = intrinsic_; + list args = args_; +} + // Abstraction DXIL Operation class DXILOp { // A short description of the operation @@ -308,6 +345,9 @@ class DXILOp { // LLVM Intrinsic DXIL Operation maps to Intrinsic LLVMIntrinsic = ?; + // Non-trivial LLVM Intrinsics DXIL Operation maps to + list intrinsic_selects = []; + // Result type of the op DXILOpParamType result; @@ -829,3 +869,17 @@ def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> { let stages = [Stages]; let attributes = [Attributes]; } + +def Barrier : DXILOp<80, barrier> { + let Doc = "inserts a memory barrier in the shader"; + let intrinsic_selects = [ + IntrinsicSelect< + int_dx_group_memory_barrier_with_group_sync, + [ ArgI32 ]>, + ]; + + let arguments = [Int32Ty]; + let result = VoidTy; + let stages = [Stages]; + let attributes = [Attributes]; +} diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 8acc9c1efa08c08..b5cf1654181c6c7 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -106,17 +106,43 @@ class OpLowerer { return false; } - [[nodiscard]] - bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp) { + struct ArgSelect { + enum class Type { + Index, + I8, + I32, + }; + Type Type = Type::Index; + int Value = -1; + }; + + [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp, + ArrayRef ArgSelects) { bool IsVectorArgExpansion = isVectorArgExpansion(F); return replaceFunction(F, [&](CallInst *CI) -> Error { - SmallVector Args; OpBuilder.getIRB().SetInsertPoint(CI); - if (IsVectorArgExpansion) { - SmallVector NewArgs = argVectorFlatten(CI, OpBuilder.getIRB()); - Args.append(NewArgs.begin(), NewArgs.end()); - } else + SmallVector Args; + if (ArgSelects.size()) { + for (const ArgSelect &A : ArgSelects) { + switch (A.Type) { + case ArgSelect::Type::Index: + Args.push_back(CI->getArgOperand(A.Value)); + break; + case ArgSelect::Type::I8: + Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value)); + break; + case ArgSelect::Type::I32: + Args.push_back(OpBuilder.getIRB().getInt32(A.Value)); + break; + default: + llvm_unreachable("Invalid type of intrinsic arg select."); + } + } + } else if (IsVectorArgExpansion) { + Args = argVectorFlatten(CI, OpBuilder.getIRB()); + } else { Args.append(CI->arg_begin(), CI->arg_end()); + } Expected OpCall = OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType()); @@ -583,9 +609,10 @@ class OpLowerer { switch (ID) { default: continue; -#define DXIL_OP_INTRINSIC(OpCode, Intrin) \ +#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...) \ case Intrin: \ - HasErrors |= replaceFunctionWithOp(F, OpCode); \ + HasErrors |= \ + replaceFunctionWithOp(F, OpCode, ArrayRef{__VA_ARGS__}); \ break; #include "DXILOperation.inc" case Intrinsic::dx_handle_fromBinding: diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll new file mode 100644 index 000000000000000..baf93d4e177f0fa --- /dev/null +++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll @@ -0,0 +1,8 @@ +; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s + +define void @test_group_memory_barrier_with_group_sync() { +entry: + ; CHECK: call void @dx.op.barrier(i32 80, i32 9) + call void @llvm.dx.group.memory.barrier.with.group.sync() + ret void +} \ No newline at end of file diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index 467a6163ae3b0c1..8594233244638d0 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -32,6 +32,20 @@ using namespace llvm::dxil; namespace { +struct DXILArgSelect { + enum class Type { + Index, + I32, + I8, + }; + Type Type = Type::Index; + int Value = -1; +}; +struct DXILIntrinsicSelect { + StringRef Intrinsic; + SmallVector Args; +}; + struct DXILOperationDesc { std::string OpName; // name of DXIL operation int OpCode; // ID of DXIL operation @@ -42,8 +56,7 @@ struct DXILOperationDesc { SmallVector OverloadRecs; SmallVector StageRecs; SmallVector AttrRecs; - StringRef Intrinsic; // The llvm intrinsic map to OpName. Default is "" which - // means no map exists + SmallVector IntrinsicSelects; SmallVector ShaderStages; // shader stages to which this applies, empty for all. int OverloadParamIndex; // Index of parameter with overload type. @@ -71,6 +84,21 @@ static void AscendingSortByVersion(std::vector &Recs) { }); } +/// Take a `int_{intrinsic_name}` and return just the intrinsic_name part if +/// available. Otherwise return the empty string. +static StringRef GetIntrinsicName(const RecordVal *RV) { + if (RV && RV->getValue()) { + if (const DefInit *DI = dyn_cast(RV->getValue())) { + auto *IntrinsicDef = DI->getDef(); + auto DefName = IntrinsicDef->getName(); + assert(DefName.starts_with("int_") && "invalid intrinsic name"); + // Remove the int_ from intrinsic name. + return DefName.substr(4); + } + } + return ""; +} + /// Construct an object using the DXIL Operation records specified /// in DXIL.td. This serves as the single source of reference of /// the information extracted from the specified Record R, for @@ -157,14 +185,63 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { OpName); } - const RecordVal *RV = R->getValue("LLVMIntrinsic"); - if (RV && RV->getValue()) { - if (const DefInit *DI = dyn_cast(RV->getValue())) { - auto *IntrinsicDef = DI->getDef(); - auto DefName = IntrinsicDef->getName(); - assert(DefName.starts_with("int_") && "invalid intrinsic name"); - // Remove the int_ from intrinsic name. - Intrinsic = DefName.substr(4); + { + DXILIntrinsicSelect IntrSelect; + IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("LLVMIntrinsic")); + if (IntrSelect.Intrinsic.size()) + IntrinsicSelects.emplace_back(std::move(IntrSelect)); + } + + auto IntrinsicSelectRecords = R->getValueAsListOfDefs("intrinsic_selects"); + if (IntrinsicSelectRecords.size()) { + if (IntrinsicSelects.size()) { + PrintFatalError( + R, Twine("LLVMIntrinsic and intrinsic_selects cannot be both " + "defined for DXIL operation - ") + + OpName); + } else { + for (const Record *R : IntrinsicSelectRecords) { + DXILIntrinsicSelect IntrSelect; + IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("intrinsic")); + auto Args = R->getValueAsListOfDefs("args"); + for (const Record *Arg : Args) { + bool IsI8 = Arg->getValueAsBit("is_i8"); + bool IsI32 = Arg->getValueAsBit("is_i32"); + int Index = Arg->getValueAsInt("index"); + const Record *ValueRec = Arg->getValueAsOptionalDef("value"); + + DXILArgSelect ArgSelect; + if (IsI8) { + if (!ValueRec) { + PrintFatalError(R, Twine("'value' must be defined for i8 " + "ArgSelect for DXIL operation - ") + + OpName); + } + ArgSelect.Type = DXILArgSelect::Type::I8; + ArgSelect.Value = ValueRec->getValueAsInt("value"); + } else if (IsI32) { + if (!ValueRec) { + PrintFatalError(R, Twine("'value' must be defined for i32 " + "ArgSelect for DXIL operation - ") + + OpName); + } + ArgSelect.Type = DXILArgSelect::Type::I32; + ArgSelect.Value = ValueRec->getValueAsInt("value"); + } else { + if (Index < 0) { + PrintFatalError( + R, Twine("Index in ArgSelect must be equal to or " + "greater than 0 for DXIL operation - ") + + OpName); + } + ArgSelect.Type = DXILArgSelect::Type::Index; + ArgSelect.Value = Index; + } + + IntrSelect.Args.emplace_back(std::move(ArgSelect)); + } + IntrinsicSelects.emplace_back(std::move(IntrSelect)); + } } } } @@ -377,10 +454,29 @@ static void emitDXILIntrinsicMap(ArrayRef Ops, OS << "#ifdef DXIL_OP_INTRINSIC\n"; OS << "\n"; for (const auto &Op : Ops) { - if (Op.Intrinsic.empty()) + if (Op.IntrinsicSelects.empty()) { continue; - OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName - << ", Intrinsic::" << Op.Intrinsic << ")\n"; + } + for (const DXILIntrinsicSelect &MappedIntr : Op.IntrinsicSelects) { + OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName + << ", Intrinsic::" << MappedIntr.Intrinsic; + for (const DXILArgSelect &ArgSelect : MappedIntr.Args) { + OS << ", (ArgSelect { "; + switch (ArgSelect.Type) { + case DXILArgSelect::Type::Index: + OS << "ArgSelect::Type::Index, "; + break; + case DXILArgSelect::Type::I8: + OS << "ArgSelect::Type::I8, "; + break; + case DXILArgSelect::Type::I32: + OS << "ArgSelect::Type::I32, "; + break; + } + OS << ArgSelect.Value << "})"; + } + OS << ")\n"; + } } OS << "\n"; OS << "#undef DXIL_OP_INTRINSIC\n"; From d661aea4c5668fc9b06f4b26d9fb072b1a6d7ff4 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 29 Oct 2024 10:18:32 -0700 Subject: [PATCH 323/425] [OpenMP] Add support for custom callback in AMDGPUStream (#112785) Summary: We have the ability to schedule callbacks after certain events complete. Currently we can register an arbitrary callback in CUDA, but can't in AMDGPU. I am planning on using this support to move the RPC handling to a separate thread, then using these callbacks to suspend / resume it when no kernels are running. This is a preliminary patch to keep this noise out of that one. --- offload/plugins-nextgen/amdgpu/src/rtl.cpp | 69 ++++++++++++++-------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index f0cc0c2e4d08e54..bdb33d4f4ab27c4 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -927,6 +927,8 @@ struct AMDGPUStreamTy { AMDGPUSignalManagerTy *SignalManager; }; + using AMDGPUStreamCallbackTy = Error(void *Data); + /// The stream is composed of N stream's slots. The struct below represents /// the fields of each slot. Each slot has a signal and an optional action /// function. When appending an HSA asynchronous operation to the stream, one @@ -942,65 +944,82 @@ struct AMDGPUStreamTy { /// operation as input signal. AMDGPUSignalTy *Signal; - /// The action that must be performed after the operation's completion. Set + /// The actions that must be performed after the operation's completion. Set /// to nullptr when there is no action to perform. - Error (*ActionFunction)(void *); + llvm::SmallVector Callbacks; /// Space for the action's arguments. A pointer to these arguments is passed /// to the action function. Notice the space of arguments is limited. - union { + union ActionArgsTy { MemcpyArgsTy MemcpyArgs; ReleaseBufferArgsTy ReleaseBufferArgs; ReleaseSignalArgsTy ReleaseSignalArgs; - } ActionArgs; + void *CallbackArgs; + }; + + llvm::SmallVector ActionArgs; /// Create an empty slot. - StreamSlotTy() : Signal(nullptr), ActionFunction(nullptr) {} + StreamSlotTy() : Signal(nullptr), Callbacks({}), ActionArgs({}) {} /// Schedule a host memory copy action on the slot. Error schedHostMemoryCopy(void *Dst, const void *Src, size_t Size) { - ActionFunction = memcpyAction; - ActionArgs.MemcpyArgs = MemcpyArgsTy{Dst, Src, Size}; + Callbacks.emplace_back(memcpyAction); + ActionArgs.emplace_back().MemcpyArgs = MemcpyArgsTy{Dst, Src, Size}; return Plugin::success(); } /// Schedule a release buffer action on the slot. Error schedReleaseBuffer(void *Buffer, AMDGPUMemoryManagerTy &Manager) { - ActionFunction = releaseBufferAction; - ActionArgs.ReleaseBufferArgs = ReleaseBufferArgsTy{Buffer, &Manager}; + Callbacks.emplace_back(releaseBufferAction); + ActionArgs.emplace_back().ReleaseBufferArgs = + ReleaseBufferArgsTy{Buffer, &Manager}; return Plugin::success(); } /// Schedule a signal release action on the slot. Error schedReleaseSignal(AMDGPUSignalTy *SignalToRelease, AMDGPUSignalManagerTy *SignalManager) { - ActionFunction = releaseSignalAction; - ActionArgs.ReleaseSignalArgs = + Callbacks.emplace_back(releaseSignalAction); + ActionArgs.emplace_back().ReleaseSignalArgs = ReleaseSignalArgsTy{SignalToRelease, SignalManager}; return Plugin::success(); } + /// Register a callback to be called on compleition + Error schedCallback(AMDGPUStreamCallbackTy *Func, void *Data) { + Callbacks.emplace_back(Func); + ActionArgs.emplace_back().CallbackArgs = Data; + + return Plugin::success(); + } + // Perform the action if needed. Error performAction() { - if (!ActionFunction) + if (Callbacks.empty()) return Plugin::success(); - // Perform the action. - if (ActionFunction == memcpyAction) { - if (auto Err = memcpyAction(&ActionArgs)) - return Err; - } else if (ActionFunction == releaseBufferAction) { - if (auto Err = releaseBufferAction(&ActionArgs)) - return Err; - } else if (ActionFunction == releaseSignalAction) { - if (auto Err = releaseSignalAction(&ActionArgs)) - return Err; - } else { - return Plugin::error("Unknown action function!"); + assert(Callbacks.size() == ActionArgs.size() && "Size mismatch"); + for (auto [Callback, ActionArg] : llvm::zip(Callbacks, ActionArgs)) { + // Perform the action. + if (Callback == memcpyAction) { + if (auto Err = memcpyAction(&ActionArg)) + return Err; + } else if (Callback == releaseBufferAction) { + if (auto Err = releaseBufferAction(&ActionArg)) + return Err; + } else if (Callback == releaseSignalAction) { + if (auto Err = releaseSignalAction(&ActionArg)) + return Err; + } else if (Callback) { + if (auto Err = Callback(ActionArg.CallbackArgs)) + return Err; + } } // Invalidate the action. - ActionFunction = nullptr; + Callbacks.clear(); + ActionArgs.clear(); return Plugin::success(); } From 4e1b9d34f922d3b8b04a65f29681cd95dc9ce75f Mon Sep 17 00:00:00 2001 From: Afanasyev Ivan Date: Wed, 30 Oct 2024 00:26:15 +0700 Subject: [PATCH 324/425] [mir-strip-debug] Fix debug location info strip for bundled instructions (#113676) Fix bug that `mir-strip-debug` pass does not remove debug location from bundled instructions. Problem arises during testing that debug info does not affect optimization passes output (`llvm-lit` with ` -Dllc="llc -debugify-and-strip-all-safe"`), when pass operates on MIR with bundled instructions + memory operands. Let mir test check looks like: ``` CHECK-NEXT: BUNDLE { CHECK-NEXT: $r3 = LD $r1, $r2 :: (load (s64) from %ir.a, !tbaa !2) CHECK-NEXT: } ``` So as `mir-strip-debug` pass does not process bundled instructions, running `llc -debugify-and-strip-all-safe` on the test will produce the following output: ``` BUNDLE { $r3 = LD $r1, $r2, debug-location !DILocation(line: 3, column: 1, scope: <0x608cb2b99b10>) :: (load (s64) from %ir.a, !tbaa !2) } ``` And test will fail, but it shouldn't. Seems like the root cause is that `mir-strip-debug` pass should remove debug location from bundled instructions. --- llvm/lib/CodeGen/MachineStripDebug.cpp | 2 +- .../CodeGen/Generic/MIRStripDebug/bundles.mir | 63 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Generic/MIRStripDebug/bundles.mir diff --git a/llvm/lib/CodeGen/MachineStripDebug.cpp b/llvm/lib/CodeGen/MachineStripDebug.cpp index 6128248a028e3e0..ea291f64bff4321 100644 --- a/llvm/lib/CodeGen/MachineStripDebug.cpp +++ b/llvm/lib/CodeGen/MachineStripDebug.cpp @@ -50,7 +50,7 @@ struct StripDebugMachineModule : public ModulePass { continue; MachineFunction &MF = *MaybeMF; for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + for (MachineInstr &MI : llvm::make_early_inc_range(MBB.instrs())) { if (MI.isDebugInstr()) { // FIXME: We should remove all of them. However, AArch64 emits an // invalid `DBG_VALUE $lr` with only one operand instead of diff --git a/llvm/test/CodeGen/Generic/MIRStripDebug/bundles.mir b/llvm/test/CodeGen/Generic/MIRStripDebug/bundles.mir new file mode 100644 index 000000000000000..111c886f585cf65 --- /dev/null +++ b/llvm/test/CodeGen/Generic/MIRStripDebug/bundles.mir @@ -0,0 +1,63 @@ +# RUN: llc -run-pass=mir-strip-debug -o - %s | FileCheck %s +# RUN: llc -run-pass=mir-strip-debug,mir-debugify,mir-strip-debug -o - %s | FileCheck %s + +--- | + source_filename = "loc-only.ll" + + define i32 @test(i32 %a, i32 %b) !dbg !6 { + %add = add i32 %a, 2, !dbg !12 + call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12 + %sub = sub i32 %add, %b, !dbg !13 + call void @llvm.dbg.value(metadata i32 %sub, metadata !11, metadata !DIExpression()), !dbg !13 + ret i32 %sub, !dbg !14 + } + + declare void @llvm.dbg.value(metadata, metadata, metadata) + + !llvm.dbg.cu = !{!0} + ; CHECK-NOT: !llvm.dbg.cu + !llvm.debugify = !{!3, !4} + ; CHECK-NOT: !llvm.debugify + !llvm.module.flags = !{!5} + ; CHECK-NOT: !llvm.module.flags + + ; CHECK-NOT: !DI + !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) + !1 = !DIFile(filename: "", directory: "/") + !2 = !{} + !3 = !{i32 3} + !4 = !{i32 2} + !5 = !{i32 2, !"Debug Info Version", i32 3} + !6 = distinct !DISubprogram(name: "test", linkageName: "test", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8) + !7 = !DISubroutineType(types: !2) + !8 = !{!9, !11} + !9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10) + !10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) + !11 = !DILocalVariable(name: "2", scope: !6, file: !1, line: 2, type: !10) + !12 = !DILocation(line: 1, column: 1, scope: !6) + !13 = !DILocation(line: 2, column: 1, scope: !6) + !14 = !DILocation(line: 3, column: 1, scope: !6) + +... +--- +name: test +body: | + bb.1 (%ir-block.0): + %0:_(s32) = G_IMPLICIT_DEF + %1:_(s32) = G_IMPLICIT_DEF + BUNDLE { + %2:_(s32) = G_CONSTANT i32 2, debug-location !DILocation(line: 0, scope: !6) + %3:_(s32) = G_ADD %0, %1, debug-location !12 + } + + ; CHECK-LABEL: body: + ; CHECK-NOT: debug-location + ; CHECK-NOT: !DI + ; CHECK-NEXT: bb + ; CHECK-NEXT: %0:_(s32) = G_IMPLICIT_DEF{{$}} + ; CHECK-NEXT: %1:_(s32) = G_IMPLICIT_DEF{{$}} + ; CHECK-NEXT: BUNDLE { + ; CHECK-NEXT: %2:_(s32) = G_CONSTANT i32 2{{$}} + ; CHECK-NEXT: %3:_(s32) = G_ADD %0, %1{{$}} + ; CHECK-NEXT: } +... From b510cdb895b9188e5819c4c85a6dab22a4d14385 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Tue, 29 Oct 2024 10:29:39 -0700 Subject: [PATCH 325/425] [ADT] Add TrieRawHashMap (#69528) Implement TrieRawHashMap can be used to store object with its associated hash. User needs to supply a strong hashing function to guarantee the uniqueness of the hash of the objects to be inserted. A hash collision is not supported and will lead to error or failed to insert. TrieRawHashMap is thread-safe and lock-free and can be used as foundation data structure to implement a content addressible storage. TrieRawHashMap owns the data stored in it and is designed to be: * Fast to lookup. * Fast to "insert" if the data has already been inserted. * Can be used without lock and doesn't require any knowledge of the participating threads or extra coordination between threads. It is not currently designed to be used to insert unique new data with high contention, due to the limitation on the memory allocator. --- .../include/llvm/ADT/TrieHashIndexGenerator.h | 122 +++++ llvm/include/llvm/ADT/TrieRawHashMap.h | 377 +++++++++++++ llvm/lib/Support/CMakeLists.txt | 1 + llvm/lib/Support/TrieRawHashMap.cpp | 515 ++++++++++++++++++ llvm/unittests/ADT/CMakeLists.txt | 1 + llvm/unittests/ADT/TrieRawHashMapTest.cpp | 346 ++++++++++++ 6 files changed, 1362 insertions(+) create mode 100644 llvm/include/llvm/ADT/TrieHashIndexGenerator.h create mode 100644 llvm/include/llvm/ADT/TrieRawHashMap.h create mode 100644 llvm/lib/Support/TrieRawHashMap.cpp create mode 100644 llvm/unittests/ADT/TrieRawHashMapTest.cpp diff --git a/llvm/include/llvm/ADT/TrieHashIndexGenerator.h b/llvm/include/llvm/ADT/TrieHashIndexGenerator.h new file mode 100644 index 000000000000000..6f7e53b6b11b539 --- /dev/null +++ b/llvm/include/llvm/ADT/TrieHashIndexGenerator.h @@ -0,0 +1,122 @@ +//===- TrieHashIndexGenerator.h ---------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_TRIEHASHINDEXGENERATOR_H +#define LLVM_ADT_TRIEHASHINDEXGENERATOR_H + +#include "llvm/ADT/ArrayRef.h" +#include + +namespace llvm { + +/// The utility class that helps computing the index of the object inside trie +/// from its hash. The generator can be configured with the number of bits +/// used for each level of trie structure with \c NumRootsBits and \c +/// NumSubtrieBits. +/// For example, try computing indexes for a 16-bit hash 0x1234 with 8-bit root +/// and 4-bit sub-trie: +/// +/// IndexGenerator IndexGen{8, 4, Hash}; +/// size_t index1 = IndexGen.next(); // index 18 in root node. +/// size_t index2 = IndexGen.next(); // index 3 in sub-trie level 1. +/// size_t index3 = IndexGen.next(); // index 4 in sub-tire level 2. +/// +/// This is used by different trie implementation to figure out where to +/// insert/find the object in the data structure. +struct TrieHashIndexGenerator { + size_t NumRootBits; + size_t NumSubtrieBits; + ArrayRef Bytes; + std::optional StartBit = std::nullopt; + + // Get the number of bits used to generate current index. + size_t getNumBits() const { + assert(StartBit); + size_t TotalNumBits = Bytes.size() * 8; + assert(*StartBit <= TotalNumBits); + return std::min(*StartBit ? NumSubtrieBits : NumRootBits, + TotalNumBits - *StartBit); + } + + // Get the index of the object in the next level of trie. + size_t next() { + if (!StartBit) { + // Compute index for root when StartBit is not set. + StartBit = 0; + return getIndex(Bytes, *StartBit, NumRootBits); + } + if (*StartBit < Bytes.size() * 8) { + // Compute index for sub-trie. + *StartBit += *StartBit ? NumSubtrieBits : NumRootBits; + assert((*StartBit - NumRootBits) % NumSubtrieBits == 0); + return getIndex(Bytes, *StartBit, NumSubtrieBits); + } + // All the bits are consumed. + return end(); + } + + // Provide a hint to speed up the index generation by providing the + // information of the hash in current level. For example, if the object is + // known to have \c Index on a level that already consumes first n \c Bits of + // the hash, it can start index generation from this level by calling \c hint + // function. + size_t hint(unsigned Index, unsigned Bit) { + assert(Bit < Bytes.size() * 8); + assert(Bit == 0 || (Bit - NumRootBits) % NumSubtrieBits == 0); + StartBit = Bit; + return Index; + } + + // Utility function for looking up the index in the trie for an object that + // has colliding hash bits in the front as the hash of the object that is + // currently being computed. + size_t getCollidingBits(ArrayRef CollidingBits) const { + assert(StartBit); + return getIndex(CollidingBits, *StartBit, NumSubtrieBits); + } + + size_t end() const { return SIZE_MAX; } + + // Compute the index for the object from its hash, current start bits, and + // the number of bits used for current level. + static size_t getIndex(ArrayRef Bytes, size_t StartBit, + size_t NumBits) { + assert(StartBit < Bytes.size() * 8); + // Drop all the bits before StartBit. + Bytes = Bytes.drop_front(StartBit / 8u); + StartBit %= 8u; + size_t Index = 0; + // Compute the index using the bits in range [StartBit, StartBit + NumBits), + // note the range can spread across few `uint8_t` in the array. + for (uint8_t Byte : Bytes) { + size_t ByteStart = 0, ByteEnd = 8; + if (StartBit) { + ByteStart = StartBit; + Byte &= (1u << (8 - StartBit)) - 1u; + StartBit = 0; + } + size_t CurrentNumBits = ByteEnd - ByteStart; + if (CurrentNumBits > NumBits) { + Byte >>= CurrentNumBits - NumBits; + CurrentNumBits = NumBits; + } + Index <<= CurrentNumBits; + Index |= Byte & ((1u << CurrentNumBits) - 1u); + + assert(NumBits >= CurrentNumBits); + NumBits -= CurrentNumBits; + if (!NumBits) + break; + } + return Index; + } +}; + +} // namespace llvm + +#endif // LLVM_ADT_TRIEHASHINDEXGENERATOR_H diff --git a/llvm/include/llvm/ADT/TrieRawHashMap.h b/llvm/include/llvm/ADT/TrieRawHashMap.h new file mode 100644 index 000000000000000..5bfe5c9e6a0f495 --- /dev/null +++ b/llvm/include/llvm/ADT/TrieRawHashMap.h @@ -0,0 +1,377 @@ +//===- TrieRawHashMap.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_TRIERAWHASHMAP_H +#define LLVM_ADT_TRIERAWHASHMAP_H + +#include "llvm/ADT/ArrayRef.h" +#include +#include + +namespace llvm { + +class raw_ostream; + +/// TrieRawHashMap - is a lock-free thread-safe trie that is can be used to +/// store/index data based on a hash value. It can be customized to work with +/// any hash algorithm or store any data. +/// +/// Data structure: +/// Data node stored in the Trie contains both hash and data: +/// struct { +/// HashT Hash; +/// DataT Data; +/// }; +/// +/// Data is stored/indexed via a prefix tree, where each node in the tree can be +/// either the root, a sub-trie or a data node. Assuming a 4-bit hash and two +/// data objects {0001, A} and {0100, B}, it can be stored in a trie +/// (assuming Root has 2 bits, SubTrie has 1 bit): +/// +--------+ +/// |Root[00]| -> {0001, A} +/// | [01]| -> {0100, B} +/// | [10]| (empty) +/// | [11]| (empty) +/// +--------+ +/// +/// Inserting a new object {0010, C} will result in: +/// +--------+ +----------+ +/// |Root[00]| -> |SubTrie[0]| -> {0001, A} +/// | | | [1]| -> {0010, C} +/// | | +----------+ +/// | [01]| -> {0100, B} +/// | [10]| (empty) +/// | [11]| (empty) +/// +--------+ +/// Note object A is sunk down to a sub-trie during the insertion. All the +/// nodes are inserted through compare-exchange to ensure thread-safe and +/// lock-free. +/// +/// To find an object in the trie, walk the tree with prefix of the hash until +/// the data node is found. Then the hash is compared with the hash stored in +/// the data node to see if the is the same object. +/// +/// Hash collision is not allowed so it is recommended to use trie with a +/// "strong" hashing algorithm. A well-distributed hash can also result in +/// better performance and memory usage. +/// +/// It currently does not support iteration and deletion. + +/// Base class for a lock-free thread-safe hash-mapped trie. +class ThreadSafeTrieRawHashMapBase { +public: + static constexpr size_t TrieContentBaseSize = 4; + static constexpr size_t DefaultNumRootBits = 6; + static constexpr size_t DefaultNumSubtrieBits = 4; + +private: + template struct AllocValueType { + char Base[TrieContentBaseSize]; + std::aligned_union_t Content; + }; + +protected: + template + static constexpr size_t DefaultContentAllocSize = sizeof(AllocValueType); + + template + static constexpr size_t DefaultContentAllocAlign = alignof(AllocValueType); + + template + static constexpr size_t DefaultContentOffset = + offsetof(AllocValueType, Content); + +public: + static void *operator new(size_t Size) { return ::operator new(Size); } + void operator delete(void *Ptr) { ::operator delete(Ptr); } + + LLVM_DUMP_METHOD void dump() const; + void print(raw_ostream &OS) const; + +protected: + /// Result of a lookup. Suitable for an insertion hint. Maybe could be + /// expanded into an iterator of sorts, but likely not useful (visiting + /// everything in the trie should probably be done some way other than + /// through an iterator pattern). + class PointerBase { + protected: + void *get() const { return I == -2u ? P : nullptr; } + + public: + PointerBase() noexcept = default; + + private: + friend class ThreadSafeTrieRawHashMapBase; + explicit PointerBase(void *Content) : P(Content), I(-2u) {} + PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {} + + bool isHint() const { return I != -1u && I != -2u; } + + void *P = nullptr; + unsigned I = -1u; + unsigned B = 0; + }; + + /// Find the stored content with hash. + PointerBase find(ArrayRef Hash) const; + + /// Insert and return the stored content. + PointerBase + insert(PointerBase Hint, ArrayRef Hash, + function_ref Hash)> + Constructor); + + ThreadSafeTrieRawHashMapBase() = delete; + + ThreadSafeTrieRawHashMapBase( + size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset, + std::optional NumRootBits = std::nullopt, + std::optional NumSubtrieBits = std::nullopt); + + /// Destructor, which asserts if there's anything to do. Subclasses should + /// call \a destroyImpl(). + /// + /// \pre \a destroyImpl() was already called. + ~ThreadSafeTrieRawHashMapBase(); + void destroyImpl(function_ref Destructor); + + ThreadSafeTrieRawHashMapBase(ThreadSafeTrieRawHashMapBase &&RHS); + + // Move assignment is not supported as it is not thread-safe. + ThreadSafeTrieRawHashMapBase & + operator=(ThreadSafeTrieRawHashMapBase &&RHS) = delete; + + // No copy. + ThreadSafeTrieRawHashMapBase(const ThreadSafeTrieRawHashMapBase &) = delete; + ThreadSafeTrieRawHashMapBase & + operator=(const ThreadSafeTrieRawHashMapBase &) = delete; + + // Debug functions. Implementation details and not guaranteed to be + // thread-safe. + PointerBase getRoot() const; + unsigned getStartBit(PointerBase P) const; + unsigned getNumBits(PointerBase P) const; + unsigned getNumSlotUsed(PointerBase P) const; + std::string getTriePrefixAsString(PointerBase P) const; + unsigned getNumTries() const; + // Visit next trie in the allocation chain. + PointerBase getNextTrie(PointerBase P) const; + +private: + friend class TrieRawHashMapTestHelper; + const unsigned short ContentAllocSize; + const unsigned short ContentAllocAlign; + const unsigned short ContentOffset; + unsigned short NumRootBits; + unsigned short NumSubtrieBits; + class ImplType; + // ImplPtr is owned by ThreadSafeTrieRawHashMapBase and needs to be freed in + // destroyImpl. + std::atomic ImplPtr; + ImplType &getOrCreateImpl(); + ImplType *getImpl() const; +}; + +/// Lock-free thread-safe hash-mapped trie. +template +class ThreadSafeTrieRawHashMap : public ThreadSafeTrieRawHashMapBase { +public: + using HashT = std::array; + + class LazyValueConstructor; + struct value_type { + const HashT Hash; + T Data; + + value_type(value_type &&) = default; + value_type(const value_type &) = default; + + value_type(ArrayRef Hash, const T &Data) + : Hash(makeHash(Hash)), Data(Data) {} + value_type(ArrayRef Hash, T &&Data) + : Hash(makeHash(Hash)), Data(std::move(Data)) {} + + private: + friend class LazyValueConstructor; + + struct EmplaceTag {}; + template + value_type(ArrayRef Hash, EmplaceTag, ArgsT &&...Args) + : Hash(makeHash(Hash)), Data(std::forward(Args)...) {} + + static HashT makeHash(ArrayRef HashRef) { + HashT Hash; + std::copy(HashRef.begin(), HashRef.end(), Hash.data()); + return Hash; + } + }; + + using ThreadSafeTrieRawHashMapBase::operator delete; + using HashType = HashT; + + using ThreadSafeTrieRawHashMapBase::dump; + using ThreadSafeTrieRawHashMapBase::print; + +private: + template class PointerImpl : PointerBase { + friend class ThreadSafeTrieRawHashMap; + + ValueT *get() const { + return reinterpret_cast(PointerBase::get()); + } + + public: + ValueT &operator*() const { + assert(get()); + return *get(); + } + ValueT *operator->() const { + assert(get()); + return get(); + } + explicit operator bool() const { return get(); } + + PointerImpl() = default; + + protected: + PointerImpl(PointerBase Result) : PointerBase(Result) {} + }; + +public: + class pointer; + class const_pointer; + class pointer : public PointerImpl { + friend class ThreadSafeTrieRawHashMap; + friend class const_pointer; + + public: + pointer() = default; + + private: + pointer(PointerBase Result) : pointer::PointerImpl(Result) {} + }; + + class const_pointer : public PointerImpl { + friend class ThreadSafeTrieRawHashMap; + + public: + const_pointer() = default; + const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {} + + private: + const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {} + }; + + class LazyValueConstructor { + public: + value_type &operator()(T &&RHS) { + assert(Mem && "Constructor already called, or moved away"); + return assign(::new (Mem) value_type(Hash, std::move(RHS))); + } + value_type &operator()(const T &RHS) { + assert(Mem && "Constructor already called, or moved away"); + return assign(::new (Mem) value_type(Hash, RHS)); + } + template value_type &emplace(ArgsT &&...Args) { + assert(Mem && "Constructor already called, or moved away"); + return assign(::new (Mem) + value_type(Hash, typename value_type::EmplaceTag{}, + std::forward(Args)...)); + } + + LazyValueConstructor(LazyValueConstructor &&RHS) + : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) { + RHS.Mem = nullptr; // Moved away, cannot call. + } + ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); } + + private: + value_type &assign(value_type *V) { + Mem = nullptr; + Result = V; + return *V; + } + friend class ThreadSafeTrieRawHashMap; + LazyValueConstructor() = delete; + LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef Hash) + : Mem(Mem), Result(Result), Hash(Hash) { + assert(Hash.size() == sizeof(HashT) && "Invalid hash"); + assert(Mem && "Invalid memory for construction"); + } + void *Mem; + value_type *&Result; + ArrayRef Hash; + }; + + /// Insert with a hint. Default-constructed hint will work, but it's + /// recommended to start with a lookup to avoid overhead in object creation + /// if it already exists. + pointer insertLazy(const_pointer Hint, ArrayRef Hash, + function_ref OnConstruct) { + return pointer(ThreadSafeTrieRawHashMapBase::insert( + Hint, Hash, [&](void *Mem, ArrayRef Hash) { + value_type *Result = nullptr; + OnConstruct(LazyValueConstructor(Mem, Result, Hash)); + return Result->Hash.data(); + })); + } + + pointer insertLazy(ArrayRef Hash, + function_ref OnConstruct) { + return insertLazy(const_pointer(), Hash, OnConstruct); + } + + pointer insert(const_pointer Hint, value_type &&HashedData) { + return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) { + C(std::move(HashedData.Data)); + }); + } + + pointer insert(const_pointer Hint, const value_type &HashedData) { + return insertLazy(Hint, HashedData.Hash, + [&](LazyValueConstructor C) { C(HashedData.Data); }); + } + + pointer find(ArrayRef Hash) { + assert(Hash.size() == std::tuple_size::value); + return ThreadSafeTrieRawHashMapBase::find(Hash); + } + + const_pointer find(ArrayRef Hash) const { + assert(Hash.size() == std::tuple_size::value); + return ThreadSafeTrieRawHashMapBase::find(Hash); + } + + ThreadSafeTrieRawHashMap(std::optional NumRootBits = std::nullopt, + std::optional NumSubtrieBits = std::nullopt) + : ThreadSafeTrieRawHashMapBase(DefaultContentAllocSize, + DefaultContentAllocAlign, + DefaultContentOffset, + NumRootBits, NumSubtrieBits) {} + + ~ThreadSafeTrieRawHashMap() { + if constexpr (std::is_trivially_destructible::value) + this->destroyImpl(nullptr); + else + this->destroyImpl( + [](void *P) { static_cast(P)->~value_type(); }); + } + + // Move constructor okay. + ThreadSafeTrieRawHashMap(ThreadSafeTrieRawHashMap &&) = default; + + // No move assignment or any copy. + ThreadSafeTrieRawHashMap &operator=(ThreadSafeTrieRawHashMap &&) = delete; + ThreadSafeTrieRawHashMap(const ThreadSafeTrieRawHashMap &) = delete; + ThreadSafeTrieRawHashMap & + operator=(const ThreadSafeTrieRawHashMap &) = delete; +}; + +} // namespace llvm + +#endif // LLVM_ADT_TRIERAWHASHMAP_H diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index 531bdeaca12614f..2ecaea4b02bf618 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -256,6 +256,7 @@ add_llvm_component_library(LLVMSupport TimeProfiler.cpp Timer.cpp ToolOutputFile.cpp + TrieRawHashMap.cpp Twine.cpp TypeSize.cpp Unicode.cpp diff --git a/llvm/lib/Support/TrieRawHashMap.cpp b/llvm/lib/Support/TrieRawHashMap.cpp new file mode 100644 index 000000000000000..9eeac0bbc5c2c3a --- /dev/null +++ b/llvm/lib/Support/TrieRawHashMap.cpp @@ -0,0 +1,515 @@ +//===- TrieRawHashMap.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/TrieRawHashMap.h" +#include "llvm/ADT/LazyAtomicPointer.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/TrieHashIndexGenerator.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ThreadSafeAllocator.h" +#include "llvm/Support/TrailingObjects.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +namespace { +struct TrieNode { + const bool IsSubtrie = false; + + TrieNode(bool IsSubtrie) : IsSubtrie(IsSubtrie) {} + + static void *operator new(size_t Size) { return ::operator new(Size); } + void operator delete(void *Ptr) { ::operator delete(Ptr); } +}; + +struct TrieContent final : public TrieNode { + const uint8_t ContentOffset; + const uint8_t HashSize; + const uint8_t HashOffset; + + void *getValuePointer() const { + auto *Content = reinterpret_cast(this) + ContentOffset; + return const_cast(Content); + } + + ArrayRef getHash() const { + auto *Begin = reinterpret_cast(this) + HashOffset; + return ArrayRef(Begin, Begin + HashSize); + } + + TrieContent(size_t ContentOffset, size_t HashSize, size_t HashOffset) + : TrieNode(/*IsSubtrie=*/false), ContentOffset(ContentOffset), + HashSize(HashSize), HashOffset(HashOffset) {} + + static bool classof(const TrieNode *TN) { return !TN->IsSubtrie; } +}; + +static_assert(sizeof(TrieContent) == + ThreadSafeTrieRawHashMapBase::TrieContentBaseSize, + "Check header assumption!"); + +class TrieSubtrie final + : public TrieNode, + private TrailingObjects> { +public: + using Slot = LazyAtomicPointer; + + Slot &get(size_t I) { return getTrailingObjects()[I]; } + TrieNode *load(size_t I) { return get(I).load(); } + + unsigned size() const { return Size; } + + TrieSubtrie * + sink(size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI, + function_ref)> Saver); + + static std::unique_ptr create(size_t StartBit, size_t NumBits); + + explicit TrieSubtrie(size_t StartBit, size_t NumBits); + + static bool classof(const TrieNode *TN) { return TN->IsSubtrie; } + + static constexpr size_t sizeToAlloc(unsigned NumBits) { + assert(NumBits < 20 && "Tries should have fewer than ~1M slots"); + size_t Count = 1u << NumBits; + return totalSizeToAlloc>(Count); + } + +private: + // FIXME: Use a bitset to speed up access: + // + // std::array, NumSlots/64> IsSet; + // + // This will avoid needing to visit sparsely filled slots in + // \a ThreadSafeTrieRawHashMapBase::destroyImpl() when there's a non-trivial + // destructor. + // + // It would also greatly speed up iteration, if we add that some day, and + // allow get() to return one level sooner. + // + // This would be the algorithm for updating IsSet (after updating Slots): + // + // std::atomic &Bits = IsSet[I.High]; + // const uint64_t NewBit = 1ULL << I.Low; + // uint64_t Old = 0; + // while (!Bits.compare_exchange_weak(Old, Old | NewBit)) + // ; + + // For debugging. + unsigned StartBit = 0; + unsigned NumBits = 0; + unsigned Size = 0; + friend class llvm::ThreadSafeTrieRawHashMapBase; + friend class TrailingObjects; + +public: + /// Linked list for ownership of tries. The pointer is owned by TrieSubtrie. + std::atomic Next; +}; +} // end namespace + +std::unique_ptr TrieSubtrie::create(size_t StartBit, + size_t NumBits) { + void *Memory = ::operator new(sizeToAlloc(NumBits)); + TrieSubtrie *S = ::new (Memory) TrieSubtrie(StartBit, NumBits); + return std::unique_ptr(S); +} + +TrieSubtrie::TrieSubtrie(size_t StartBit, size_t NumBits) + : TrieNode(true), StartBit(StartBit), NumBits(NumBits), Size(1u << NumBits), + Next(nullptr) { + for (unsigned I = 0; I < Size; ++I) + new (&get(I)) Slot(nullptr); + + static_assert( + std::is_trivially_destructible>::value, + "Expected no work in destructor for TrieNode"); +} + +// Sink the nodes down sub-trie when the object being inserted collides with +// the index of existing object in the trie. In this case, a new sub-trie needs +// to be allocated to hold existing object. +TrieSubtrie *TrieSubtrie::sink( + size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI, + function_ref)> Saver) { + // Create a new sub-trie that points to the existing object with the new + // index for the next level. + assert(NumSubtrieBits > 0); + std::unique_ptr S = create(StartBit + NumBits, NumSubtrieBits); + + assert(NewI < Size); + S->get(NewI).store(&Content); + + // Using compare_exchange to atomically add back the new sub-trie to the trie + // in the place of the exsiting object. + TrieNode *ExistingNode = &Content; + assert(I < Size); + if (get(I).compare_exchange_strong(ExistingNode, S.get())) + return Saver(std::move(S)); + + // Another thread created a subtrie already. Return it and let "S" be + // destructed. + return cast(ExistingNode); +} + +class ThreadSafeTrieRawHashMapBase::ImplType final + : private TrailingObjects { +public: + static std::unique_ptr create(size_t StartBit, size_t NumBits) { + size_t Size = sizeof(ImplType) + TrieSubtrie::sizeToAlloc(NumBits); + void *Memory = ::operator new(Size); + ImplType *Impl = ::new (Memory) ImplType(StartBit, NumBits); + return std::unique_ptr(Impl); + } + + // Save the Subtrie into the ownship list of the trie structure in a + // thread-safe way. The ownership transfer is done by compare_exchange the + // pointer value inside the unique_ptr. + TrieSubtrie *save(std::unique_ptr S) { + assert(!S->Next && "Expected S to a freshly-constructed leaf"); + + TrieSubtrie *CurrentHead = nullptr; + // Add ownership of "S" to front of the list, so that Root -> S -> + // Root.Next. This works by repeatedly setting S->Next to a candidate value + // of Root.Next (initially nullptr), then setting Root.Next to S once the + // candidate matches reality. + while (!getRoot()->Next.compare_exchange_weak(CurrentHead, S.get())) + S->Next.exchange(CurrentHead); + + // Ownership transferred to subtrie successfully. Release the unique_ptr. + return S.release(); + } + + // Get the root which is the trailing object. + TrieSubtrie *getRoot() { return getTrailingObjects(); } + + static void *operator new(size_t Size) { return ::operator new(Size); } + void operator delete(void *Ptr) { ::operator delete(Ptr); } + + /// FIXME: This should take a function that allocates and constructs the + /// content lazily (taking the hash as a separate parameter), in case of + /// collision. + ThreadSafeAllocator ContentAlloc; + +private: + friend class TrailingObjects; + + ImplType(size_t StartBit, size_t NumBits) { + ::new (getRoot()) TrieSubtrie(StartBit, NumBits); + } +}; + +ThreadSafeTrieRawHashMapBase::ImplType & +ThreadSafeTrieRawHashMapBase::getOrCreateImpl() { + if (ImplType *Impl = ImplPtr.load()) + return *Impl; + + // Create a new ImplType and store it if another thread doesn't do so first. + // If another thread wins this one is destroyed locally. + std::unique_ptr Impl = ImplType::create(0, NumRootBits); + ImplType *ExistingImpl = nullptr; + + // If the ownership transferred succesfully, release unique_ptr and return + // the pointer to the new ImplType. + if (ImplPtr.compare_exchange_strong(ExistingImpl, Impl.get())) + return *Impl.release(); + + // Already created, return the existing ImplType. + return *ExistingImpl; +} + +ThreadSafeTrieRawHashMapBase::PointerBase +ThreadSafeTrieRawHashMapBase::find(ArrayRef Hash) const { + assert(!Hash.empty() && "Uninitialized hash"); + + ImplType *Impl = ImplPtr.load(); + if (!Impl) + return PointerBase(); + + TrieSubtrie *S = Impl->getRoot(); + TrieHashIndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash}; + size_t Index = IndexGen.next(); + while (Index != IndexGen.end()) { + // Try to set the content. + TrieNode *Existing = S->get(Index); + if (!Existing) + return PointerBase(S, Index, *IndexGen.StartBit); + + // Check for an exact match. + if (auto *ExistingContent = dyn_cast(Existing)) + return ExistingContent->getHash() == Hash + ? PointerBase(ExistingContent->getValuePointer()) + : PointerBase(S, Index, *IndexGen.StartBit); + + Index = IndexGen.next(); + S = cast(Existing); + } + llvm_unreachable("failed to locate the node after consuming all hash bytes"); +} + +ThreadSafeTrieRawHashMapBase::PointerBase ThreadSafeTrieRawHashMapBase::insert( + PointerBase Hint, ArrayRef Hash, + function_ref Hash)> + Constructor) { + assert(!Hash.empty() && "Uninitialized hash"); + + ImplType &Impl = getOrCreateImpl(); + TrieSubtrie *S = Impl.getRoot(); + TrieHashIndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash}; + size_t Index; + if (Hint.isHint()) { + S = static_cast(Hint.P); + Index = IndexGen.hint(Hint.I, Hint.B); + } else { + Index = IndexGen.next(); + } + + while (Index != IndexGen.end()) { + // Load the node from the slot, allocating and calling the constructor if + // the slot is empty. + bool Generated = false; + TrieNode &Existing = S->get(Index).loadOrGenerate([&]() { + Generated = true; + + // Construct the value itself at the tail. + uint8_t *Memory = reinterpret_cast( + Impl.ContentAlloc.Allocate(ContentAllocSize, ContentAllocAlign)); + const uint8_t *HashStorage = Constructor(Memory + ContentOffset, Hash); + + // Construct the TrieContent header, passing in the offset to the hash. + TrieContent *Content = ::new (Memory) + TrieContent(ContentOffset, Hash.size(), HashStorage - Memory); + assert(Hash == Content->getHash() && "Hash not properly initialized"); + return Content; + }); + // If we just generated it, return it! + if (Generated) + return PointerBase(cast(Existing).getValuePointer()); + + if (auto *ST = dyn_cast(&Existing)) { + S = ST; + Index = IndexGen.next(); + continue; + } + + // Return the existing content if it's an exact match! + auto &ExistingContent = cast(Existing); + if (ExistingContent.getHash() == Hash) + return PointerBase(ExistingContent.getValuePointer()); + + // Sink the existing content as long as the indexes match. + size_t NextIndex = IndexGen.next(); + while (NextIndex != IndexGen.end()) { + size_t NewIndexForExistingContent = + IndexGen.getCollidingBits(ExistingContent.getHash()); + S = S->sink(Index, ExistingContent, IndexGen.getNumBits(), + NewIndexForExistingContent, + [&Impl](std::unique_ptr S) { + return Impl.save(std::move(S)); + }); + Index = NextIndex; + + // Found the difference. + if (NextIndex != NewIndexForExistingContent) + break; + + NextIndex = IndexGen.next(); + } + } + llvm_unreachable("failed to insert the node after consuming all hash bytes"); +} + +ThreadSafeTrieRawHashMapBase::ThreadSafeTrieRawHashMapBase( + size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset, + std::optional NumRootBits, std::optional NumSubtrieBits) + : ContentAllocSize(ContentAllocSize), ContentAllocAlign(ContentAllocAlign), + ContentOffset(ContentOffset), + NumRootBits(NumRootBits ? *NumRootBits : DefaultNumRootBits), + NumSubtrieBits(NumSubtrieBits ? *NumSubtrieBits : DefaultNumSubtrieBits), + ImplPtr(nullptr) { + // Assertion checks for reasonable configuration. The settings below are not + // hard limits on most platforms, but a reasonable configuration should fall + // within those limits. + assert((!NumRootBits || *NumRootBits < 20) && + "Root should have fewer than ~1M slots"); + assert((!NumSubtrieBits || *NumSubtrieBits < 10) && + "Subtries should have fewer than ~1K slots"); +} + +ThreadSafeTrieRawHashMapBase::ThreadSafeTrieRawHashMapBase( + ThreadSafeTrieRawHashMapBase &&RHS) + : ContentAllocSize(RHS.ContentAllocSize), + ContentAllocAlign(RHS.ContentAllocAlign), + ContentOffset(RHS.ContentOffset), NumRootBits(RHS.NumRootBits), + NumSubtrieBits(RHS.NumSubtrieBits) { + // Steal the root from RHS. + ImplPtr = RHS.ImplPtr.exchange(nullptr); +} + +ThreadSafeTrieRawHashMapBase::~ThreadSafeTrieRawHashMapBase() { + assert(!ImplPtr.load() && "Expected subclass to call destroyImpl()"); +} + +void ThreadSafeTrieRawHashMapBase::destroyImpl( + function_ref Destructor) { + std::unique_ptr Impl(ImplPtr.exchange(nullptr)); + if (!Impl) + return; + + // Destroy content nodes throughout trie. Avoid destroying any subtries since + // we need TrieNode::classof() to find the content nodes. + // + // FIXME: Once we have bitsets (see FIXME in TrieSubtrie class), use them + // facilitate sparse iteration here. + if (Destructor) + for (TrieSubtrie *Trie = Impl->getRoot(); Trie; Trie = Trie->Next.load()) + for (unsigned I = 0; I < Trie->size(); ++I) + if (auto *Content = dyn_cast_or_null(Trie->load(I))) + Destructor(Content->getValuePointer()); + + // Destroy the subtries. Incidentally, this destroys them in the reverse order + // of saving. + TrieSubtrie *Trie = Impl->getRoot()->Next; + while (Trie) { + TrieSubtrie *Next = Trie->Next.exchange(nullptr); + delete Trie; + Trie = Next; + } +} + +ThreadSafeTrieRawHashMapBase::PointerBase +ThreadSafeTrieRawHashMapBase::getRoot() const { + ImplType *Impl = ImplPtr.load(); + if (!Impl) + return PointerBase(); + return PointerBase(Impl->getRoot()); +} + +unsigned ThreadSafeTrieRawHashMapBase::getStartBit( + ThreadSafeTrieRawHashMapBase::PointerBase P) const { + assert(!P.isHint() && "Not a valid trie"); + if (!P.P) + return 0; + if (auto *S = dyn_cast((TrieNode *)P.P)) + return S->StartBit; + return 0; +} + +unsigned ThreadSafeTrieRawHashMapBase::getNumBits( + ThreadSafeTrieRawHashMapBase::PointerBase P) const { + assert(!P.isHint() && "Not a valid trie"); + if (!P.P) + return 0; + if (auto *S = dyn_cast((TrieNode *)P.P)) + return S->NumBits; + return 0; +} + +unsigned ThreadSafeTrieRawHashMapBase::getNumSlotUsed( + ThreadSafeTrieRawHashMapBase::PointerBase P) const { + assert(!P.isHint() && "Not a valid trie"); + if (!P.P) + return 0; + auto *S = dyn_cast((TrieNode *)P.P); + if (!S) + return 0; + unsigned Num = 0; + for (unsigned I = 0, E = S->size(); I < E; ++I) + if (auto *E = S->load(I)) + ++Num; + return Num; +} + +std::string ThreadSafeTrieRawHashMapBase::getTriePrefixAsString( + ThreadSafeTrieRawHashMapBase::PointerBase P) const { + assert(!P.isHint() && "Not a valid trie"); + if (!P.P) + return ""; + + auto *S = dyn_cast((TrieNode *)P.P); + if (!S || !S->IsSubtrie) + return ""; + + // Find a TrieContent node which has hash stored. Depth search following the + // first used slot until a TrieContent node is found. + TrieSubtrie *Current = S; + TrieContent *Node = nullptr; + while (Current) { + TrieSubtrie *Next = nullptr; + // Find first used slot in the trie. + for (unsigned I = 0, E = Current->size(); I < E; ++I) { + auto *S = Current->load(I); + if (!S) + continue; + + if (auto *Content = dyn_cast(S)) + Node = Content; + else if (auto *Sub = dyn_cast(S)) + Next = Sub; + break; + } + + // Found the node. + if (Node) + break; + + // Continue to the next level if the node is not found. + Current = Next; + } + + assert(Node && "malformed trie, cannot find TrieContent on leaf node"); + // The prefix for the current trie is the first `StartBit` of the content + // stored underneath this subtrie. + std::string Str; + raw_string_ostream SS(Str); + + unsigned StartFullBytes = (S->StartBit + 1) / 8 - 1; + SS << toHex(toStringRef(Node->getHash()).take_front(StartFullBytes), + /*LowerCase=*/true); + + // For the part of the prefix that doesn't fill a byte, print raw bit values. + std::string Bits; + for (unsigned I = StartFullBytes * 8, E = S->StartBit; I < E; ++I) { + unsigned Index = I / 8; + unsigned Offset = 7 - I % 8; + Bits.push_back('0' + ((Node->getHash()[Index] >> Offset) & 1)); + } + + if (!Bits.empty()) + SS << "[" << Bits << "]"; + + return SS.str(); +} + +unsigned ThreadSafeTrieRawHashMapBase::getNumTries() const { + ImplType *Impl = ImplPtr.load(); + if (!Impl) + return 0; + unsigned Num = 0; + for (TrieSubtrie *Trie = Impl->getRoot(); Trie; Trie = Trie->Next.load()) + ++Num; + return Num; +} + +ThreadSafeTrieRawHashMapBase::PointerBase +ThreadSafeTrieRawHashMapBase::getNextTrie( + ThreadSafeTrieRawHashMapBase::PointerBase P) const { + assert(!P.isHint() && "Not a valid trie"); + if (!P.P) + return PointerBase(); + auto *S = dyn_cast((TrieNode *)P.P); + if (!S) + return PointerBase(); + if (auto *E = S->Next.load()) + return PointerBase(E); + return PointerBase(); +} diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt index 745e4d9fb74a4a6..b0077d5b54a3ee5 100644 --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -86,6 +86,7 @@ add_llvm_unittest(ADTTests StringSetTest.cpp StringSwitchTest.cpp TinyPtrVectorTest.cpp + TrieRawHashMapTest.cpp TwineTest.cpp TypeSwitchTest.cpp TypeTraitsTest.cpp diff --git a/llvm/unittests/ADT/TrieRawHashMapTest.cpp b/llvm/unittests/ADT/TrieRawHashMapTest.cpp new file mode 100644 index 000000000000000..c9081f547812e90 --- /dev/null +++ b/llvm/unittests/ADT/TrieRawHashMapTest.cpp @@ -0,0 +1,346 @@ +//===- TrieRawHashMapTest.cpp ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/TrieRawHashMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/SHA1.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace llvm { +class TrieRawHashMapTestHelper { +public: + TrieRawHashMapTestHelper() = default; + + void setTrie(ThreadSafeTrieRawHashMapBase *T) { Trie = T; } + + ThreadSafeTrieRawHashMapBase::PointerBase getRoot() const { + return Trie->getRoot(); + } + unsigned getStartBit(ThreadSafeTrieRawHashMapBase::PointerBase P) const { + return Trie->getStartBit(P); + } + unsigned getNumBits(ThreadSafeTrieRawHashMapBase::PointerBase P) const { + return Trie->getNumBits(P); + } + unsigned getNumSlotUsed(ThreadSafeTrieRawHashMapBase::PointerBase P) const { + return Trie->getNumSlotUsed(P); + } + unsigned getNumTries() const { return Trie->getNumTries(); } + std::string + getTriePrefixAsString(ThreadSafeTrieRawHashMapBase::PointerBase P) const { + return Trie->getTriePrefixAsString(P); + } + ThreadSafeTrieRawHashMapBase::PointerBase + getNextTrie(ThreadSafeTrieRawHashMapBase::PointerBase P) const { + return Trie->getNextTrie(P); + } + +private: + ThreadSafeTrieRawHashMapBase *Trie = nullptr; +}; +} // namespace llvm + +namespace { +template +class SimpleTrieHashMapTest : public TrieRawHashMapTestHelper, + public ::testing::Test { +public: + using NumType = DataType; + using HashType = std::array; + using TrieType = ThreadSafeTrieRawHashMap; + + TrieType &createTrie(size_t RootBits, size_t SubtrieBits) { + auto &Ret = Trie.emplace(RootBits, SubtrieBits); + TrieRawHashMapTestHelper::setTrie(&Ret); + return Ret; + } + + void destroyTrie() { Trie.reset(); } + ~SimpleTrieHashMapTest() { destroyTrie(); } + + // Use the number itself as hash to test the pathological case. + static HashType hash(uint64_t Num) { + uint64_t HashN = + llvm::support::endian::byte_swap(Num, llvm::endianness::big); + HashType Hash; + memcpy(&Hash[0], &HashN, sizeof(HashType)); + return Hash; + }; + +private: + std::optional Trie; +}; + +using SmallNodeTrieTest = SimpleTrieHashMapTest; + +TEST_F(SmallNodeTrieTest, TrieAllocation) { + NumType Numbers[] = { + 0x0, std::numeric_limits::max(), 0x1, 0x2, + 0x3, std::numeric_limits::max() - 1u, + }; + + unsigned ExpectedTries[] = { + 1, // Allocate Root. + 1, // Both on the root. + 64, // 0 and 1 sinks all the way down. + 64, // no new allocation needed. + 65, // need a new node between 2 and 3. + 65 + 63, // 63 new allocation to sink two big numbers all the way. + }; + + const char *ExpectedPrefix[] = { + "", // Root. + "", // Root. + "00000000000000[0000000]", + "00000000000000[0000000]", + "00000000000000[0000001]", + "ffffffffffffff[1111111]", + }; + + // Use root and subtrie sizes of 1 so this gets sunk quite deep. + auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1); + + for (unsigned I = 0; I < 6; ++I) { + // Lookup first to exercise hint code for deep tries. + TrieType::pointer Lookup = Trie.find(hash(Numbers[I])); + EXPECT_FALSE(Lookup); + + Trie.insert(Lookup, TrieType::value_type(hash(Numbers[I]), Numbers[I])); + EXPECT_EQ(getNumTries(), ExpectedTries[I]); + EXPECT_EQ(getTriePrefixAsString(getNextTrie(getRoot())), ExpectedPrefix[I]); + } +} + +TEST_F(SmallNodeTrieTest, TrieStructure) { + NumType Numbers[] = { + // Three numbers that will nest deeply to test (1) sinking subtries and + // (2) deep, non-trivial hints. + std::numeric_limits::max(), + std::numeric_limits::max() - 2u, + std::numeric_limits::max() - 3u, + // One number to stay at the top-level. + 0x37, + }; + + // Use root and subtrie sizes of 1 so this gets sunk quite deep. + auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1); + + for (NumType N : Numbers) { + // Lookup first to exercise hint code for deep tries. + TrieType::pointer Lookup = Trie.find(hash(N)); + EXPECT_FALSE(Lookup); + + Trie.insert(Lookup, TrieType::value_type(hash(N), N)); + } + for (NumType N : Numbers) { + TrieType::pointer Lookup = Trie.find(hash(N)); + EXPECT_TRUE(Lookup); + if (!Lookup) + continue; + EXPECT_EQ(hash(N), Lookup->Hash); + EXPECT_EQ(N, Lookup->Data); + + // Confirm a subsequent insertion fails to overwrite by trying to insert a + // bad value. + auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1)); + EXPECT_EQ(N, Result->Data); + } + + // Check the trie so we can confirm the structure is correct. Each subtrie + // should have 2 slots. The root's index=0 should have the content for + // 0x37 directly, and index=1 should be a linked-list of subtries, finally + // ending with content for (max-2) and (max-3). + // + // Note: This structure is not exhaustive (too expensive to update tests), + // but it does test that the dump format is somewhat readable and that the + // basic structure is correct. + // + // Note: This test requires that the trie reads bytes starting from index 0 + // of the array of uint8_t, and then reads each byte's bits from high to low. + + // Check the Trie. + // We should allocated a total of 64 SubTries for 64 bit hash. + ASSERT_EQ(getNumTries(), 64u); + // Check the root trie. Two slots and both are used. + ASSERT_EQ(getNumSlotUsed(getRoot()), 2u); + // Check last subtrie. + // Last allocated trie is the next node in the allocation chain. + auto LastAlloctedSubTrie = getNextTrie(getRoot()); + ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie), + "ffffffffffffff[1111110]"); + ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u); + ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u); + ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u); +} + +TEST_F(SmallNodeTrieTest, TrieStructureSmallFinalSubtrie) { + NumType Numbers[] = { + // Three numbers that will nest deeply to test (1) sinking subtries and + // (2) deep, non-trivial hints. + std::numeric_limits::max(), + std::numeric_limits::max() - 2u, + std::numeric_limits::max() - 3u, + // One number to stay at the top-level. + 0x37, + }; + + // Use subtrie size of 5 to avoid hitting 64 evenly, making the final subtrie + // small. + auto &Trie = createTrie(/*RootBits=*/8, /*SubtrieBits=*/5); + + for (NumType N : Numbers) { + // Lookup first to exercise hint code for deep tries. + TrieType::pointer Lookup = Trie.find(hash(N)); + EXPECT_FALSE(Lookup); + + Trie.insert(Lookup, TrieType::value_type(hash(N), N)); + } + for (NumType N : Numbers) { + TrieType::pointer Lookup = Trie.find(hash(N)); + ASSERT_TRUE(Lookup); + EXPECT_EQ(hash(N), Lookup->Hash); + EXPECT_EQ(N, Lookup->Data); + + // Confirm a subsequent insertion fails to overwrite by trying to insert a + // bad value. + auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1)); + EXPECT_EQ(N, Result->Data); + } + + // Check the trie so we can confirm the structure is correct. The root + // should have 2^8=256 slots, most subtries should have 2^5=32 slots, and the + // deepest subtrie should have 2^1=2 slots (since (64-8)mod(5)=1). + // should have 2 slots. The root's index=0 should have the content for + // 0x37 directly, and index=1 should be a linked-list of subtries, finally + // ending with content for (max-2) and (max-3). + // + // Note: This structure is not exhaustive (too expensive to update tests), + // but it does test that the dump format is somewhat readable and that the + // basic structure is correct. + // + // Note: This test requires that the trie reads bytes starting from index 0 + // of the array of uint8_t, and then reads each byte's bits from high to low. + + // Check the Trie. + // 64 bit hash = 8 + 5 * 11 + 1, so 1 root, 11 8bit subtrie and 1 last level + // subtrie, 13 total. + ASSERT_EQ(getNumTries(), 13u); + // Check the root trie. Two slots and both are used. + ASSERT_EQ(getNumSlotUsed(getRoot()), 2u); + // Check last subtrie. + // Last allocated trie is the next node in the allocation chain. + auto LastAlloctedSubTrie = getNextTrie(getRoot()); + ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie), + "ffffffffffffff[1111110]"); + ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u); + ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u); + ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u); +} + +TEST_F(SmallNodeTrieTest, TrieDestructionLoop) { + // Test destroying large Trie. Make sure there is no recursion that can + // overflow the stack. + + // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate. + auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1); + + // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug + // builds. + static constexpr uint64_t MaxN = 100000; + for (uint64_t N = 0; N != MaxN; ++N) { + HashType Hash = hash(N); + Trie.insert(TrieType::pointer(), TrieType::value_type(Hash, NumType{N})); + } + + // Destroy tries. If destruction is recursive and MaxN is high enough, these + // will both fail. + destroyTrie(); +} + +struct NumWithDestructorT { + uint64_t Num; + llvm::function_ref DestructorCallback; + ~NumWithDestructorT() { DestructorCallback(); } +}; + +using NodeWithDestructorTrieTest = SimpleTrieHashMapTest; + +TEST_F(NodeWithDestructorTrieTest, TrieDestructionLoop) { + // Test destroying large Trie. Make sure there is no recursion that can + // overflow the stack. + + // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate. + auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1); + + // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug + // builds. + static constexpr uint64_t MaxN = 100000; + + uint64_t DestructorCalled = 0; + auto DtorCallback = [&DestructorCalled]() { ++DestructorCalled; }; + for (uint64_t N = 0; N != MaxN; ++N) { + HashType Hash = hash(N); + Trie.insert(TrieType::pointer(), + TrieType::value_type(Hash, NumType{N, DtorCallback})); + } + // Reset the count after all the temporaries get destroyed. + DestructorCalled = 0; + + // Destroy tries. If destruction is recursive and MaxN is high enough, these + // will both fail. + destroyTrie(); + + // Count the number of destructor calls during `destroyTrie()`. + ASSERT_EQ(DestructorCalled, MaxN); +} + +using NumStrNodeTrieTest = SimpleTrieHashMapTest; + +TEST_F(NumStrNodeTrieTest, TrieInsertLazy) { + for (unsigned RootBits : {2, 3, 6, 10}) { + for (unsigned SubtrieBits : {2, 3, 4}) { + auto &Trie = createTrie(RootBits, SubtrieBits); + for (int I = 0, E = 1000; I != E; ++I) { + TrieType::pointer Lookup; + HashType H = hash(I); + if (I & 1) + Lookup = Trie.find(H); + + auto insertNum = [&](uint64_t Num) { + std::string S = Twine(I).str(); + auto Hash = hash(Num); + return Trie.insertLazy( + Hash, [&](TrieType::LazyValueConstructor C) { C(std::move(S)); }); + }; + auto S1 = insertNum(I); + // The address of the Data should be the same. + EXPECT_EQ(&S1->Data, &insertNum(I)->Data); + + auto insertStr = [&](std::string S) { + int Num = std::stoi(S); + return insertNum(Num); + }; + std::string S2 = S1->Data; + // The address of the Data should be the same. + EXPECT_EQ(&S1->Data, &insertStr(S2)->Data); + } + for (int I = 0, E = 1000; I != E; ++I) { + std::string S = Twine(I).str(); + TrieType::pointer Lookup = Trie.find(hash(I)); + EXPECT_TRUE(Lookup); + if (!Lookup) + continue; + EXPECT_EQ(S, Lookup->Data); + } + } + } +} +} // end anonymous namespace From 950ee75909d94c582ecac4d3d559c364ed88244f Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Tue, 29 Oct 2024 17:30:30 +0000 Subject: [PATCH 326/425] [RISC-V] Fix check of minimum vlen. (#114055) If we have a minimum vlen, we were adjusting StackSize to change the unit from vscale to bytes, and then calculating the required padding size for alignment in bytes. However, we then used that padding size as an offset in vscale units, resulting in misplaced stack objects. While it would be possible to adjust the object offsets by dividing AlignmentPadding by ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, we can simplify the calculation a bit if instead we adjust the alignment to be in vscale units. @topperc This fixes a bug I am seeing after #110312, but I am not 100% certain I am understanding the code correctly, could you please see if this makes sense to you? --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 24 ++--- .../CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll | 101 ++++++++++++++++++ 2 files changed, 113 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index b49cbab1876d79a..d70903519ecb05d 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1133,23 +1133,23 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const { uint64_t StackSize = Offset; - // Multiply by vscale. - if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock) - StackSize *= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock; - // Ensure the alignment of the RVV stack. Since we want the most-aligned // object right at the bottom (i.e., any padding at the top of the frame), // readjust all RVV objects down by the alignment padding. - if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) { - StackSize += AlignmentPadding; - for (int FI : ObjectsToAllocate) - MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); + // Stack size and offsets are multiples of vscale, stack alignment is in + // bytes, we can divide stack alignment by minimum vscale to get a maximum + // stack alignment multiple of vscale. + auto VScale = + std::max(ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, 1); + if (auto RVVStackAlignVScale = RVVStackAlign.value() / VScale) { + if (auto AlignmentPadding = + offsetToAlignment(StackSize, Align(RVVStackAlignVScale))) { + StackSize += AlignmentPadding; + for (int FI : ObjectsToAllocate) + MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding); + } } - // Remove vscale. - if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock) - StackSize /= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock; - return std::make_pair(StackSize, RVVStackAlign); } diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll index 35e269b91190257..43be8feece23c1b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -756,3 +756,104 @@ define void @lmul_8_x9() nounwind { %v9 = alloca ret void } + +define void @lmul_16_align() nounwind { +; NOZBA-LABEL: lmul_16_align: +; NOZBA: # %bb.0: +; NOZBA-NEXT: addi sp, sp, -144 +; NOZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; NOZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; NOZBA-NEXT: addi s0, sp, 144 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: li a1, 24 +; NOZBA-NEXT: mul a0, a0, a1 +; NOZBA-NEXT: sub sp, sp, a0 +; NOZBA-NEXT: andi sp, sp, -128 +; NOZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; NOZBA-NEXT: vmv.v.i v8, 0 +; NOZBA-NEXT: csrr a0, vlenb +; NOZBA-NEXT: add a0, sp, a0 +; NOZBA-NEXT: addi a0, a0, 128 +; NOZBA-NEXT: vs8r.v v8, (a0) +; NOZBA-NEXT: csrr a1, vlenb +; NOZBA-NEXT: slli a1, a1, 3 +; NOZBA-NEXT: add a0, a0, a1 +; NOZBA-NEXT: vs8r.v v8, (a0) +; NOZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; NOZBA-NEXT: vmv.v.i v8, 0 +; NOZBA-NEXT: addi a0, sp, 128 +; NOZBA-NEXT: vs1r.v v8, (a0) +; NOZBA-NEXT: addi sp, s0, -144 +; NOZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; NOZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; NOZBA-NEXT: addi sp, sp, 144 +; NOZBA-NEXT: ret +; +; ZBA-LABEL: lmul_16_align: +; ZBA: # %bb.0: +; ZBA-NEXT: addi sp, sp, -144 +; ZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; ZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; ZBA-NEXT: addi s0, sp, 144 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: slli a0, a0, 3 +; ZBA-NEXT: sh1add a0, a0, a0 +; ZBA-NEXT: sub sp, sp, a0 +; ZBA-NEXT: andi sp, sp, -128 +; ZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; ZBA-NEXT: vmv.v.i v8, 0 +; ZBA-NEXT: csrr a0, vlenb +; ZBA-NEXT: add a0, sp, a0 +; ZBA-NEXT: addi a0, a0, 128 +; ZBA-NEXT: vs8r.v v8, (a0) +; ZBA-NEXT: csrr a1, vlenb +; ZBA-NEXT: sh3add a0, a1, a0 +; ZBA-NEXT: vs8r.v v8, (a0) +; ZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; ZBA-NEXT: vmv.v.i v8, 0 +; ZBA-NEXT: addi a0, sp, 128 +; ZBA-NEXT: vs1r.v v8, (a0) +; ZBA-NEXT: addi sp, s0, -144 +; ZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; ZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; ZBA-NEXT: addi sp, sp, 144 +; ZBA-NEXT: ret +; +; NOMUL-LABEL: lmul_16_align: +; NOMUL: # %bb.0: +; NOMUL-NEXT: addi sp, sp, -144 +; NOMUL-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; NOMUL-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; NOMUL-NEXT: addi s0, sp, 144 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: slli a0, a0, 3 +; NOMUL-NEXT: mv a1, a0 +; NOMUL-NEXT: slli a0, a0, 1 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: sub sp, sp, a0 +; NOMUL-NEXT: andi sp, sp, -128 +; NOMUL-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; NOMUL-NEXT: vmv.v.i v8, 0 +; NOMUL-NEXT: csrr a0, vlenb +; NOMUL-NEXT: add a0, sp, a0 +; NOMUL-NEXT: addi a0, a0, 128 +; NOMUL-NEXT: vs8r.v v8, (a0) +; NOMUL-NEXT: csrr a1, vlenb +; NOMUL-NEXT: slli a1, a1, 3 +; NOMUL-NEXT: add a0, a0, a1 +; NOMUL-NEXT: vs8r.v v8, (a0) +; NOMUL-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; NOMUL-NEXT: vmv.v.i v8, 0 +; NOMUL-NEXT: addi a0, sp, 128 +; NOMUL-NEXT: vs1r.v v8, (a0) +; NOMUL-NEXT: addi sp, s0, -144 +; NOMUL-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; NOMUL-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; NOMUL-NEXT: addi sp, sp, 144 +; NOMUL-NEXT: ret + %v1 = alloca + %v2 = alloca + store zeroinitializer, ptr %v1 + store zeroinitializer, ptr %v2 + ret void +} From 4abc35740760b626d3fcabd001593d46c4b595af Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 29 Oct 2024 13:36:22 -0400 Subject: [PATCH 327/425] Nominate Sirraide for AST visitors and Sema (#114092) Sirraide has been actively reviewing Sema code for a while now and definitely has the expertise to help maintain that section of the compiler. Further, he has been refactoring AST visitors to try to reduce the compile time overhead associated with them and would be a good resource for keeping an eye on that part of the code base too. --- clang/Maintainers.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index 08dcc584f6c5748..9d3f6d25f60bb51 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -33,6 +33,12 @@ AST matchers | aaron\@aaronballman.com (email), aaron.ballman (Phabricator), AaronBallman (GitHub), AaronBallman (Discourse), aaronballman (Discord), AaronBallman (IRC) +AST Visitors +~~~~~~~~~~~~ +| Sirraide +| aeternalmail\@gmail.com (email), Sirraide (GitHub), Ætérnal (Discord), Sirraide (Discourse) + + Clang LLVM IR generation ~~~~~~~~~~~~~~~~~~~~~~~~ | John McCall @@ -57,6 +63,12 @@ Analysis & CFG | sgatev\@google.com (email), sgatev (Phabricator), sgatev (GitHub) +Sema +~~~~ +| Sirraide +| aeternalmail\@gmail.com (email), Sirraide (GitHub), Ætérnal (Discord), Sirraide (Discourse) + + Experimental new constant interpreter ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | Timm Bäder From 639a7ac648f1e50ccd2556e17d401c04f9cce625 Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Tue, 29 Oct 2024 11:36:55 -0600 Subject: [PATCH 328/425] [Clang][AST] Store injected template arguments in TemplateParameterList (#113579) Currently, we store injected template arguments in `RedeclarableTemplateDecl::CommonBase`. This approach has a couple problems: 1. We can only access the injected template arguments of `RedeclarableTemplateDecl` derived types, but other `Decl` kinds still make use of the injected arguments (e.g. `ClassTemplatePartialSpecializationDecl`, `VarTemplatePartialSpecializationDecl`, and `TemplateTemplateParmDecl`). 2. Accessing the injected template arguments requires the common data structure to be allocated. This may occur before we determine whether a previous declaration exists (e.g. when comparing constraints), so if the template _is_ a redeclaration, we end up discarding the common data structure. This patch moves the storage and access of injected template arguments from `RedeclarableTemplateDecl` to `TemplateParameterList`. --- clang/include/clang/AST/ASTContext.h | 12 +--- clang/include/clang/AST/DeclTemplate.h | 44 +++++++-------- clang/lib/AST/ASTContext.cpp | 16 ++---- clang/lib/AST/DeclTemplate.cpp | 64 +++++----------------- clang/lib/Sema/SemaTemplateDeduction.cpp | 13 ++--- clang/lib/Sema/SemaTemplateInstantiate.cpp | 14 +++-- 6 files changed, 54 insertions(+), 109 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index a4d36f2eacd5d1b..07b4e36f3ef05e5 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -239,7 +239,7 @@ class ASTContext : public RefCountedBase { mutable llvm::ContextualFoldingSet DependentTemplateSpecializationTypes; - llvm::FoldingSet PackExpansionTypes; + mutable llvm::FoldingSet PackExpansionTypes; mutable llvm::FoldingSet ObjCObjectTypes; mutable llvm::FoldingSet ObjCObjectPointerTypes; mutable llvm::FoldingSet @@ -1778,13 +1778,7 @@ class ASTContext : public RefCountedBase { ElaboratedTypeKeyword Keyword, NestedNameSpecifier *NNS, const IdentifierInfo *Name, ArrayRef Args) const; - TemplateArgument getInjectedTemplateArg(NamedDecl *ParamDecl); - - /// Get a template argument list with one argument per template parameter - /// in a template parameter list, such as for the injected class name of - /// a class template. - void getInjectedTemplateArgs(const TemplateParameterList *Params, - SmallVectorImpl &Args); + TemplateArgument getInjectedTemplateArg(NamedDecl *ParamDecl) const; /// Form a pack expansion type with the given pattern. /// \param NumExpansions The number of expansions for the pack, if known. @@ -1795,7 +1789,7 @@ class ASTContext : public RefCountedBase { /// if this is the canonical type of another pack expansion type. QualType getPackExpansionType(QualType Pattern, std::optional NumExpansions, - bool ExpectPackInType = true); + bool ExpectPackInType = true) const; QualType getObjCInterfaceType(const ObjCInterfaceDecl *Decl, ObjCInterfaceDecl *PrevDecl = nullptr) const; diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index 0f0c0bf6e4ef4f5..a572e3380f16550 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -71,6 +71,9 @@ NamedDecl *getAsNamedDecl(TemplateParameter P); class TemplateParameterList final : private llvm::TrailingObjects { + /// The template argument list of the template parameter list. + TemplateArgument *InjectedArgs = nullptr; + /// The location of the 'template' keyword. SourceLocation TemplateLoc; @@ -196,6 +199,9 @@ class TemplateParameterList final bool hasAssociatedConstraints() const; + /// Get the template argument list of the template parameter list. + ArrayRef getInjectedTemplateArgs(const ASTContext &Context); + SourceLocation getTemplateLoc() const { return TemplateLoc; } SourceLocation getLAngleLoc() const { return LAngleLoc; } SourceLocation getRAngleLoc() const { return RAngleLoc; } @@ -793,15 +799,6 @@ class RedeclarableTemplateDecl : public TemplateDecl, /// The first value in the array is the number of specializations/partial /// specializations that follow. GlobalDeclID *LazySpecializations = nullptr; - - /// The set of "injected" template arguments used within this - /// template. - /// - /// This pointer refers to the template arguments (there are as - /// many template arguments as template parameters) for the - /// template, and is allocated lazily, since most templates do not - /// require the use of this information. - TemplateArgument *InjectedArgs = nullptr; }; /// Pointer to the common data shared by all declarations of this @@ -927,7 +924,10 @@ class RedeclarableTemplateDecl : public TemplateDecl, /// Although the C++ standard has no notion of the "injected" template /// arguments for a template, the notion is convenient when /// we need to perform substitutions inside the definition of a template. - ArrayRef getInjectedTemplateArgs(); + ArrayRef + getInjectedTemplateArgs(const ASTContext &Context) const { + return getTemplateParameters()->getInjectedTemplateArgs(Context); + } using redecl_range = redeclarable_base::redecl_range; using redecl_iterator = redeclarable_base::redecl_iterator; @@ -2087,10 +2087,6 @@ class ClassTemplatePartialSpecializationDecl /// The list of template parameters TemplateParameterList *TemplateParams = nullptr; - /// The set of "injected" template arguments used within this - /// partial specialization. - TemplateArgument *InjectedArgs = nullptr; - /// The class template partial specialization from which this /// class template partial specialization was instantiated. /// @@ -2136,9 +2132,11 @@ class ClassTemplatePartialSpecializationDecl return TemplateParams; } - /// Retrieve the template arguments list of the template parameter list - /// of this template. - ArrayRef getInjectedTemplateArgs(); + /// Get the template argument list of the template parameter list. + ArrayRef + getInjectedTemplateArgs(const ASTContext &Context) const { + return getTemplateParameters()->getInjectedTemplateArgs(Context); + } /// \brief All associated constraints of this partial specialization, /// including the requires clause and any constraints derived from @@ -2864,10 +2862,6 @@ class VarTemplatePartialSpecializationDecl /// The list of template parameters TemplateParameterList *TemplateParams = nullptr; - /// The set of "injected" template arguments used within this - /// partial specialization. - TemplateArgument *InjectedArgs = nullptr; - /// The variable template partial specialization from which this /// variable template partial specialization was instantiated. /// @@ -2914,9 +2908,11 @@ class VarTemplatePartialSpecializationDecl return TemplateParams; } - /// Retrieve the template arguments list of the template parameter list - /// of this template. - ArrayRef getInjectedTemplateArgs(); + /// Get the template argument list of the template parameter list. + ArrayRef + getInjectedTemplateArgs(const ASTContext &Context) const { + return getTemplateParameters()->getInjectedTemplateArgs(Context); + } /// \brief All associated constraints of this partial specialization, /// including the requires clause and any constraints derived from diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 69892bda42b2566..1c3f771f417ccf2 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -5634,7 +5634,7 @@ ASTContext::getDependentTemplateSpecializationType( return QualType(T, 0); } -TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) { +TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) const { TemplateArgument Arg; if (const auto *TTP = dyn_cast(Param)) { QualType ArgType = getTypeDeclType(TTP); @@ -5678,23 +5678,15 @@ TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) { } if (Param->isTemplateParameterPack()) - Arg = TemplateArgument::CreatePackCopy(*this, Arg); + Arg = + TemplateArgument::CreatePackCopy(const_cast(*this), Arg); return Arg; } -void -ASTContext::getInjectedTemplateArgs(const TemplateParameterList *Params, - SmallVectorImpl &Args) { - Args.reserve(Args.size() + Params->size()); - - for (NamedDecl *Param : *Params) - Args.push_back(getInjectedTemplateArg(Param)); -} - QualType ASTContext::getPackExpansionType(QualType Pattern, std::optional NumExpansions, - bool ExpectPackInType) { + bool ExpectPackInType) const { assert((!ExpectPackInType || Pattern->containsUnexpandedParameterPack()) && "Pack expansions must expand one or more parameter packs"); diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp index 4a506b7be456429..755ec72f00bf771 100644 --- a/clang/lib/AST/DeclTemplate.cpp +++ b/clang/lib/AST/DeclTemplate.cpp @@ -51,7 +51,7 @@ DefaultTemplateArgumentContainsUnexpandedPack(const TemplateParam &P) { P.getDefaultArgument().getArgument().containsUnexpandedParameterPack(); } -TemplateParameterList::TemplateParameterList(const ASTContext& C, +TemplateParameterList::TemplateParameterList(const ASTContext &C, SourceLocation TemplateLoc, SourceLocation LAngleLoc, ArrayRef Params, @@ -244,6 +244,17 @@ bool TemplateParameterList::hasAssociatedConstraints() const { return HasRequiresClause || HasConstrainedParameters; } +ArrayRef +TemplateParameterList::getInjectedTemplateArgs(const ASTContext &Context) { + if (!InjectedArgs) { + InjectedArgs = new (Context) TemplateArgument[size()]; + llvm::transform(*this, InjectedArgs, [&](NamedDecl *ND) { + return Context.getInjectedTemplateArg(ND); + }); + } + return {InjectedArgs, NumParams}; +} + bool TemplateParameterList::shouldIncludeTypeForArgument( const PrintingPolicy &Policy, const TemplateParameterList *TPL, unsigned Idx) { @@ -396,22 +407,6 @@ void RedeclarableTemplateDecl::addSpecializationImpl( SETraits::getDecl(Entry)); } -ArrayRef RedeclarableTemplateDecl::getInjectedTemplateArgs() { - TemplateParameterList *Params = getTemplateParameters(); - auto *CommonPtr = getCommonPtr(); - if (!CommonPtr->InjectedArgs) { - auto &Context = getASTContext(); - SmallVector TemplateArgs; - Context.getInjectedTemplateArgs(Params, TemplateArgs); - CommonPtr->InjectedArgs = - new (Context) TemplateArgument[TemplateArgs.size()]; - std::copy(TemplateArgs.begin(), TemplateArgs.end(), - CommonPtr->InjectedArgs); - } - - return llvm::ArrayRef(CommonPtr->InjectedArgs, Params->size()); -} - //===----------------------------------------------------------------------===// // FunctionTemplateDecl Implementation //===----------------------------------------------------------------------===// @@ -631,13 +626,10 @@ ClassTemplateDecl::getInjectedClassNameSpecialization() { // expansion (14.5.3) whose pattern is the name of the template parameter // pack. ASTContext &Context = getASTContext(); - TemplateParameterList *Params = getTemplateParameters(); - SmallVector TemplateArgs; - Context.getInjectedTemplateArgs(Params, TemplateArgs); TemplateName Name = Context.getQualifiedTemplateName( /*NNS=*/nullptr, /*TemplateKeyword=*/false, TemplateName(this)); - CommonPtr->InjectedClassNameType = - Context.getTemplateSpecializationType(Name, TemplateArgs); + CommonPtr->InjectedClassNameType = Context.getTemplateSpecializationType( + Name, getTemplateParameters()->getInjectedTemplateArgs(Context)); return CommonPtr->InjectedClassNameType; } @@ -1184,20 +1176,6 @@ SourceRange ClassTemplatePartialSpecializationDecl::getSourceRange() const { return Range; } -ArrayRef -ClassTemplatePartialSpecializationDecl::getInjectedTemplateArgs() { - TemplateParameterList *Params = getTemplateParameters(); - auto *First = cast(getFirstDecl()); - if (!First->InjectedArgs) { - auto &Context = getASTContext(); - SmallVector TemplateArgs; - Context.getInjectedTemplateArgs(Params, TemplateArgs); - First->InjectedArgs = new (Context) TemplateArgument[TemplateArgs.size()]; - std::copy(TemplateArgs.begin(), TemplateArgs.end(), First->InjectedArgs); - } - return llvm::ArrayRef(First->InjectedArgs, Params->size()); -} - //===----------------------------------------------------------------------===// // FriendTemplateDecl Implementation //===----------------------------------------------------------------------===// @@ -1548,20 +1526,6 @@ SourceRange VarTemplatePartialSpecializationDecl::getSourceRange() const { return Range; } -ArrayRef -VarTemplatePartialSpecializationDecl::getInjectedTemplateArgs() { - TemplateParameterList *Params = getTemplateParameters(); - auto *First = cast(getFirstDecl()); - if (!First->InjectedArgs) { - auto &Context = getASTContext(); - SmallVector TemplateArgs; - Context.getInjectedTemplateArgs(Params, TemplateArgs); - First->InjectedArgs = new (Context) TemplateArgument[TemplateArgs.size()]; - std::copy(TemplateArgs.begin(), TemplateArgs.end(), First->InjectedArgs); - } - return llvm::ArrayRef(First->InjectedArgs, Params->size()); -} - static TemplateParameterList * createMakeIntegerSeqParameterList(const ASTContext &C, DeclContext *DC) { // typename T diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index db1d7fa237131a8..b45f30fed49a647 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -6163,7 +6163,7 @@ struct TemplateArgumentListAreEqual { std::enable_if_t, bool> = true> bool operator()(T1 *Spec, T2 *Primary) { ArrayRef Args1 = Spec->getTemplateArgs().asArray(), - Args2 = Primary->getInjectedTemplateArgs(); + Args2 = Primary->getInjectedTemplateArgs(Ctx); for (unsigned I = 0, E = Args1.size(); I < E; ++I) { // We use profile, instead of structural comparison of the arguments, @@ -6342,7 +6342,7 @@ bool Sema::isMoreSpecializedThanPrimary( VarTemplateDecl *Primary = Spec->getSpecializedTemplate(); TemplateName Name(Primary); QualType PrimaryT = Context.getTemplateSpecializationType( - Name, Primary->getInjectedTemplateArgs()); + Name, Primary->getInjectedTemplateArgs(Context)); QualType PartialT = Context.getTemplateSpecializationType( Name, Spec->getTemplateArgs().asArray()); @@ -6372,18 +6372,14 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( // - Each function template has a single function parameter whose type is // a specialization of X with template arguments corresponding to the // template parameters from the respective function template - SmallVector AArgs; - Context.getInjectedTemplateArgs(A, AArgs); + SmallVector AArgs(A->getInjectedTemplateArgs(Context)); // Check P's arguments against A's parameter list. This will fill in default // template arguments as needed. AArgs are already correct by construction. // We can't just use CheckTemplateIdType because that will expand alias // templates. - SmallVector PArgs; + SmallVector PArgs(P->getInjectedTemplateArgs(Context)); { - SFINAETrap Trap(*this); - - Context.getInjectedTemplateArgs(P, PArgs); TemplateArgumentListInfo PArgList(P->getLAngleLoc(), P->getRAngleLoc()); for (unsigned I = 0, N = P->size(); I != N; ++I) { @@ -6399,6 +6395,7 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( } PArgs.clear(); + SFINAETrap Trap(*this); // C++1z [temp.arg.template]p3: // If the rewrite produces an invalid type, then P is not at least as // specialized as A. diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 6a55861fe5af3b1..dea97bfce532c9d 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -200,7 +200,7 @@ struct TemplateInstantiationArgumentCollecter if (Innermost) AddInnermostTemplateArguments(FTD); else if (ForConstraintInstantiation) - AddOuterTemplateArguments(FTD, FTD->getInjectedTemplateArgs(), + AddOuterTemplateArguments(FTD, FTD->getInjectedTemplateArgs(S.Context), /*Final=*/false); if (FTD->isMemberSpecialization()) @@ -219,7 +219,7 @@ struct TemplateInstantiationArgumentCollecter if (Innermost) AddInnermostTemplateArguments(VTD); else if (ForConstraintInstantiation) - AddOuterTemplateArguments(VTD, VTD->getInjectedTemplateArgs(), + AddOuterTemplateArguments(VTD, VTD->getInjectedTemplateArgs(S.Context), /*Final=*/false); if (VTD->isMemberSpecialization()) @@ -237,7 +237,8 @@ struct TemplateInstantiationArgumentCollecter if (Innermost) AddInnermostTemplateArguments(VTPSD); else if (ForConstraintInstantiation) - AddOuterTemplateArguments(VTPSD, VTPSD->getInjectedTemplateArgs(), + AddOuterTemplateArguments(VTPSD, + VTPSD->getInjectedTemplateArgs(S.Context), /*Final=*/false); if (VTPSD->isMemberSpecialization()) @@ -254,7 +255,7 @@ struct TemplateInstantiationArgumentCollecter if (Innermost) AddInnermostTemplateArguments(CTD); else if (ForConstraintInstantiation) - AddOuterTemplateArguments(CTD, CTD->getInjectedTemplateArgs(), + AddOuterTemplateArguments(CTD, CTD->getInjectedTemplateArgs(S.Context), /*Final=*/false); if (CTD->isMemberSpecialization()) @@ -274,7 +275,8 @@ struct TemplateInstantiationArgumentCollecter if (Innermost) AddInnermostTemplateArguments(CTPSD); else if (ForConstraintInstantiation) - AddOuterTemplateArguments(CTPSD, CTPSD->getInjectedTemplateArgs(), + AddOuterTemplateArguments(CTPSD, + CTPSD->getInjectedTemplateArgs(S.Context), /*Final=*/false); if (CTPSD->isMemberSpecialization()) @@ -290,7 +292,7 @@ struct TemplateInstantiationArgumentCollecter if (Innermost) AddInnermostTemplateArguments(TATD); else if (ForConstraintInstantiation) - AddOuterTemplateArguments(TATD, TATD->getInjectedTemplateArgs(), + AddOuterTemplateArguments(TATD, TATD->getInjectedTemplateArgs(S.Context), /*Final=*/false); return UseNextDecl(TATD); From 449523fa0f957db0fff1c0cd9ec5f59e858ece0b Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 29 Oct 2024 13:38:54 -0400 Subject: [PATCH 329/425] Nominate Vassil Vassilev for Modules and Plugins (#114058) Vassil has significant experience helping users with the plugin interface in Clang, especially around the new efforts to bring plugin support to Windows. He also is knowledgeable about modules support. --- clang/Maintainers.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index 9d3f6d25f60bb51..35c218d8e0e8fa4 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -83,6 +83,9 @@ Modules & serialization | Michael Spencer | bigcheesegs\@gmail.com (email), Bigcheese (Phabricator), Bigcheese (GitHub) +| Vassil Vassilev +| Vassil.Vassilev\@cern.ch (email), v.g.vassilev (Phabricator), vgvassilev (GitHub) + Templates ~~~~~~~~~ @@ -190,6 +193,12 @@ Attributes | ekeane\@nvidia.com (email), ErichKeane (Phabricator), erichkeane (GitHub) +Plugins +~~~~~~~ +| Vassil Vassilev +| Vassil.Vassilev\@cern.ch (email), v.g.vassilev (Phabricator), vgvassilev (GitHub) + + Inline assembly ~~~~~~~~~~~~~~~ | Eric Christopher From 528e975ac4081c7d84c5664c7ca9a18a916db4c7 Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Tue, 29 Oct 2024 13:48:43 -0400 Subject: [PATCH 330/425] [AMDGPU][test]added unique and sort options for update_mc_test_check script (#111769) add a unique and a sort option to the update_mc_test_check script. These mc asm/dasm files are usually large in number of lines, and these lines are mostly similar to each other. These options can be useful when maintainer is merging or resolving conflicts by making the file identifical Also fixed a small issue in asm/dasm such that the auto generated header line is 1. asm using ";" instead of "//" as comment marker 2. dasm using ";" instead of "#" as comment marker --- .../Inputs/amdgpu_asm.s.expected | 2 +- .../Inputs/amdgpu_asm_err.s.expected | 2 +- .../Inputs/amdgpu_asm_sort.s | 5 + .../Inputs/amdgpu_asm_sort.s.expected | 8 ++ .../Inputs/amdgpu_asm_sort_with_comment.s | 9 ++ .../amdgpu_asm_sort_with_comment.s.expected | 13 +++ .../Inputs/amdgpu_asm_unique.s | 10 ++ .../Inputs/amdgpu_asm_unique.s.expected | 10 ++ .../Inputs/amdgpu_dasm.txt.expected | 2 +- .../Inputs/amdgpu_dasm_unique.txt | 5 + .../Inputs/amdgpu_dasm_unique.txt.expected | 5 + .../Inputs/amdgpu_multirun_dasm.txt.expected | 2 +- .../update_mc_test_checks/amdgpu-sort.test | 7 ++ .../update_mc_test_checks/amdgpu-unique.test | 7 ++ llvm/utils/UpdateTestChecks/common.py | 4 +- llvm/utils/update_mc_test_checks.py | 92 +++++++++++++++++-- 16 files changed, 170 insertions(+), 13 deletions(-) create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt.expected create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-sort.test create mode 100644 llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-unique.test diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm.s.expected index 7336947a3f57a06..2dc30cd112e4658 100644 --- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm.s.expected +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm.s.expected @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py // RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s v_bfrev_b32 v5, v1 diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected index 0a0ad51d15e056d..ca287fc2d632098 100644 --- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py // RUN: not llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s v_bfrev_b32 v5, v299 diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s new file mode 100644 index 000000000000000..ea03c5a6911fadc --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s @@ -0,0 +1,5 @@ +// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s + +v_bfrev_b32 v5, v1 + +v_bfrev_b32 v1, v1 diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s.expected new file mode 100644 index 000000000000000..57f72ed406fb407 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s.expected @@ -0,0 +1,8 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort +// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s + +v_bfrev_b32 v1, v1 +// CHECK: v_bfrev_b32_e32 v1, v1 ; encoding: [0x01,0x71,0x02,0x7e] + +v_bfrev_b32 v5, v1 +// CHECK: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s new file mode 100644 index 000000000000000..d60b3bda29eda2d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s @@ -0,0 +1,9 @@ +// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s + +v_bfrev_b32 v5, v1 //This is comment A + +v_bfrev_b32 v1, v1 +// This is comment B + +// This is comment C +v_bfrev_b32 v2, v1 diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s.expected new file mode 100644 index 000000000000000..6924880032717ea --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s.expected @@ -0,0 +1,13 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort +// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s + +v_bfrev_b32 v1, v1 +// CHECK: v_bfrev_b32_e32 v1, v1 ; encoding: [0x01,0x71,0x02,0x7e] +// This is comment B + +// This is comment C +v_bfrev_b32 v2, v1 +// CHECK: v_bfrev_b32_e32 v2, v1 ; encoding: [0x01,0x71,0x04,0x7e] + +v_bfrev_b32 v5, v1 //This is comment A +// CHECK: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s new file mode 100644 index 000000000000000..63240174cdde55f --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s @@ -0,0 +1,10 @@ +// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s + +//this is commentA +v_bfrev_b32 v5, v1 + +v_bfrev_b32 v5, v1 + +//this is commentB + +//this is commentB diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s.expected new file mode 100644 index 000000000000000..8203b90040ba4ed --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s.expected @@ -0,0 +1,10 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique +// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s + +//this is commentA +v_bfrev_b32 v5, v1 +// CHECK: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] + +//this is commentB + +//this is commentB diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm.txt.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm.txt.expected index a6f7abcb1774ac0..b3cbaff6d1c7ef8 100644 --- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm.txt.expected +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm.txt.expected @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK %s 0x00,0x00,0x00,0x7e diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt new file mode 100644 index 000000000000000..3d0d49ddeea4256 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt @@ -0,0 +1,5 @@ +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK %s + +0x00,0x00,0x00,0x7e + +0x00,0x00,0x00,0x7e diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt.expected new file mode 100644 index 000000000000000..32bddb20628dad1 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt.expected @@ -0,0 +1,5 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK %s + +0x00,0x00,0x00,0x7e +# CHECK: v_nop ; encoding: [0x00,0x00,0x00,0x7e] diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_multirun_dasm.txt.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_multirun_dasm.txt.expected index 03a5ec3c559dbfc..7b6b832801625b4 100644 --- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_multirun_dasm.txt.expected +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_multirun_dasm.txt.expected @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py # RUN: llvm-mc -triple=amdgcn -mcpu=tonga -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK,CHECKA %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK,CHECKB %s diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-sort.test b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-sort.test new file mode 100644 index 000000000000000..f8972ffabf0999c --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-sort.test @@ -0,0 +1,7 @@ +# REQUIRES: amdgpu-registered-target +## Check that sort is working + +# RUN: cp -f %S/Inputs/amdgpu_asm_sort.s %t.s && %update_mc_test_checks --sort %t.s +# RUN: diff -u %S/Inputs/amdgpu_asm_sort.s.expected %t.s +# RUN: cp -f %S/Inputs/amdgpu_asm_sort_with_comment.s %t.s && %update_mc_test_checks --sort %t.s +# RUN: diff -u %S/Inputs/amdgpu_asm_sort_with_comment.s.expected %t.s diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-unique.test b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-unique.test new file mode 100644 index 000000000000000..8a5d83462cad73d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-unique.test @@ -0,0 +1,7 @@ +# REQUIRES: amdgpu-registered-target +## Check that unique is working + +# RUN: cp -f %S/Inputs/amdgpu_asm_unique.s %t.s && %update_mc_test_checks --unique %t.s +# RUN: diff -u %S/Inputs/amdgpu_asm_unique.s.expected %t.s +# RUN: cp -f %S/Inputs/amdgpu_dasm_unique.txt %t.txt && %update_mc_test_checks --unique %t.txt +# RUN: diff -u %S/Inputs/amdgpu_dasm_unique.txt.expected %t.txt diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index 0fbb73431d2cfb8..cdfa8978566fb4d 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -275,8 +275,10 @@ def __init__( self.run_lines = find_run_lines(test, self.input_lines) self.comment_prefix = comment_prefix if self.comment_prefix is None: - if self.path.endswith(".mir"): + if self.path.endswith(".mir") or self.path.endswith(".txt"): self.comment_prefix = "#" + elif self.path.endswith(".s"): + self.comment_prefix = "//" else: self.comment_prefix = ";" self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT diff --git a/llvm/utils/update_mc_test_checks.py b/llvm/utils/update_mc_test_checks.py index f9f8cfdea418d01..55ed6c82d4877e2 100755 --- a/llvm/utils/update_mc_test_checks.py +++ b/llvm/utils/update_mc_test_checks.py @@ -6,6 +6,7 @@ from __future__ import print_function import argparse +import functools import os # Used to advertise this file's name ("autogenerated_note"). from UpdateTestChecks import common @@ -50,6 +51,10 @@ def isTestLine(input_line, mc_mode): return True +def isRunLine(l): + return common.RUN_LINE_RE.match(l) + + def hasErr(err): return err and ERROR_RE.search(err) is not None @@ -118,6 +123,19 @@ def main(): default=None, help="Set a default -march for when neither triple nor arch are found in a RUN line", ) + parser.add_argument( + "--unique", + action="store_true", + default=False, + help="remove duplicated test line if found", + ) + parser.add_argument( + "--sort", + action="store_true", + default=False, + help="sort testline in alphabetic order (keep run-lines on top), this option could be dangerous as it" + "could change the order of lines that are not expected", + ) parser.add_argument("tests", nargs="+") initial_args = common.parse_commandline_args(parser) @@ -130,6 +148,11 @@ def main(): mc_mode = "asm" elif ti.path.endswith(".txt"): mc_mode = "dasm" + + if ti.args.sort: + print("sorting with dasm(.txt) file is not supported!") + return -1 + else: common.warn("Expected .s and .txt, Skipping file : ", ti.path) continue @@ -196,6 +219,10 @@ def main(): # find all test line from input testlines = [l for l in ti.input_lines if isTestLine(l, mc_mode)] + # remove duplicated lines to save running time + testlines = list(dict.fromkeys(testlines)) + common.debug("Valid test line found: ", len(testlines)) + run_list_size = len(run_list) testnum = len(testlines) @@ -233,7 +260,7 @@ def main(): raw_prefixes.append(prefixes) output_lines = [] - generated_prefixes = [] + generated_prefixes = {} used_prefixes = set() prefix_set = set([prefix for p in run_list for prefix in p[0]]) common.debug("Rewriting FileCheck prefixes:", str(prefix_set)) @@ -298,23 +325,72 @@ def main(): else: gen_prefix += getStdCheckLine(prefix, o, mc_mode) - generated_prefixes.append(gen_prefix.rstrip("\n")) + generated_prefixes[input_line] = gen_prefix.rstrip("\n") # write output - prefix_id = 0 for input_info in ti.iterlines(output_lines): input_line = input_info.line - if isTestLine(input_line, mc_mode): + if input_line in testlines: output_lines.append(input_line) - output_lines.append(generated_prefixes[prefix_id]) - prefix_id += 1 + output_lines.append(generated_prefixes[input_line]) elif should_add_line_to_output(input_line, prefix_set, mc_mode): output_lines.append(input_line) - elif input_line in ti.run_lines or input_line == "": - output_lines.append(input_line) + if ti.args.unique or ti.args.sort: + # split with double newlines + test_units = "\n".join(output_lines).split("\n\n") + + # select the key line for each test unit + test_dic = {} + for unit in test_units: + lines = unit.split("\n") + for l in lines: + # if contains multiple lines, use + # the first testline or runline as key + if isTestLine(l, mc_mode): + test_dic[unit] = l + break + if isRunLine(l): + test_dic[unit] = l + break + + # unique + if ti.args.unique: + new_test_units = [] + written_lines = set() + for unit in test_units: + # if not testline/runline, we just add it + if unit not in test_dic: + new_test_units.append(unit) + else: + if test_dic[unit] in written_lines: + common.debug("Duplicated test skipped: ", unit) + continue + + written_lines.add(test_dic[unit]) + new_test_units.append(unit) + test_units = new_test_units + + # sort + if ti.args.sort: + + def getkey(l): + # find key of test unit, otherwise use first line + if l in test_dic: + line = test_dic[l] + else: + line = l.split("\n")[0] + + # runline placed on the top + return (not isRunLine(line), line) + + test_units = sorted(test_units, key=getkey) + + # join back to be output string + output_lines = "\n\n".join(test_units).split("\n") + # output if ti.args.gen_unused_prefix_body: output_lines.extend( ti.get_checks_for_unused_prefixes(run_list, used_prefixes) From ba65710908137fe68e7c039f1e2829c3d37480f3 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Tue, 29 Oct 2024 10:49:35 -0700 Subject: [PATCH 331/425] [RISCV] Avoid redundant SchedRead on _TIED VPseudos (#113940) _TIED and _MASK_TIED pseudos have one less operand compared to other pseudos, thus we shouldn't attach the same number of SchedRead for these instructions. I don't think we have a way to (explicitly) check scheduling classes. So I only test this patch with existing tests. --- llvm/lib/Target/RISCV/RISCVInstrInfoV.td | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 4e8619c5ec2392e..8e0c4826ac00de4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -104,13 +104,28 @@ class SchedCommon writes, list reads, string mx = "WorstCase", int sew = 0, bit forceMasked = 0, bit forcePassthruRead = 0> : Sched<[]> { defvar isMasked = !ne(!find(NAME, "_MASK"), -1); + defvar isTied = !ne(!find(NAME, "_TIED"), -1); defvar isMaskedOrForceMasked = !or(forceMasked, isMasked); + defvar isTiedMasked = !and(isMaskedOrForceMasked, isTied); defvar passthruRead = !if(!or(!eq(mx, "WorstCase"), !eq(sew, 0)), !cast("ReadVPassthru_" # mx), !cast("ReadVPassthru_" # mx # "_E" #sew)); - defvar needsPassthruRead = !or(isMaskedOrForceMasked, forcePassthruRead); + // We don't need passthru operand if it's already _TIED without mask. + defvar needsForcePassthruRead = !and(forcePassthruRead, !not(isTied)); + defvar needsPassthruRead = !or(isMaskedOrForceMasked, needsForcePassthruRead); + // If this is a _TIED + masked operation, $rs2 (i.e. the first operand) is + // merged with the mask. + // NOTE: the following if statement is written in such a weird way because + // should we want to write something like + // `!if(!and(!not(!empty(reads), isTiedMasked), !tail(reads), reads)` + // since `!if` doesn't have a proper short-circuit behavior, if the + // condition of this `!if` cannot be resolved right away, `!tail(reads)` will + // be immediately evaluated anyway even when `reads` is empty, which leads to + // an assertion failure. + defvar readsWithTiedMask = + !if(isTiedMasked, !if(!not(!empty(reads)), !tail(reads), reads), reads); defvar readsWithMask = - !if(isMaskedOrForceMasked, !listconcat(reads, [ReadVMask]), reads); + !if(isMaskedOrForceMasked, !listconcat(readsWithTiedMask, [ReadVMask]), reads); defvar allReads = !if(needsPassthruRead, !listconcat([passthruRead], readsWithMask), reads); let SchedRW = !listconcat(writes, allReads); From 6f66530fd17a2333939e6b5a46d378ac7379f7ca Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 29 Oct 2024 10:55:34 -0700 Subject: [PATCH 332/425] [mlir] Fix a warning This patch fixes: mlir/lib/Pass/PassRegistry.cpp:425:37: error: ISO C++ requires the name after '::~' to be found in the same scope as the name before '::~' [-Werror,-Wdtor-name] --- mlir/lib/Pass/PassRegistry.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index 029512fd3ecc118..fe842755958418e 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -422,7 +422,7 @@ llvm::cl::OptionValue::operator=( return *this; } -llvm::cl::OptionValue::~OptionValue() = default; +llvm::cl::OptionValue::~OptionValue() = default; void llvm::cl::OptionValue::setValue( const OpPassManager &newValue) { From b0dd368d5741b1ad117848e33148d95406b33241 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 29 Oct 2024 18:01:23 +0000 Subject: [PATCH 333/425] [gn build] Port b510cdb895b9 --- llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn index 64b03b57388cb2d..d152aec19d1b587 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn @@ -152,6 +152,7 @@ static_library("Support") { "TimeProfiler.cpp", "Timer.cpp", "ToolOutputFile.cpp", + "TrieRawHashMap.cpp", "Twine.cpp", "TypeSize.cpp", "Unicode.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn index c27faaaecf30d16..07ed3b4718af50e 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn @@ -94,6 +94,7 @@ unittest("ADTTests") { "StringSetTest.cpp", "StringSwitchTest.cpp", "TinyPtrVectorTest.cpp", + "TrieRawHashMapTest.cpp", "TwineTest.cpp", "TypeSwitchTest.cpp", "TypeTraitsTest.cpp", From 6563ed3162d16e7f067dda554e96d0c9d476f207 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 29 Oct 2024 14:10:25 -0400 Subject: [PATCH 334/425] [libc++][NFC] Remove trailing whitespace in the modulemap --- libcxx/include/module.modulemap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index c3561590e06d8a7..c3d080007319927 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1229,7 +1229,7 @@ module std [system] { header "flat_map" export * } - + module format { module buffer { header "__format/buffer.h" } module concepts { header "__format/concepts.h" } From cdacc9b5c7ec020bad24dbdcbeba96ac1d2713e5 Mon Sep 17 00:00:00 2001 From: Jerry Sun <105613447+jerryyiransun@users.noreply.github.com> Date: Tue, 29 Oct 2024 14:10:54 -0400 Subject: [PATCH 335/425] [TableGen] [NFC] Refine TableGen code to comply with `clang-tidy` checks (#113318) Code cleanups for TableGen files, changes includes function names, variable names and unused imports. --------- Co-authored-by: Matt Arsenault --- llvm/utils/TableGen/ARMTargetDefEmitter.cpp | 20 +- llvm/utils/TableGen/CallingConvEmitter.cpp | 48 ++-- llvm/utils/TableGen/CodeEmitterGen.cpp | 232 +++++++-------- llvm/utils/TableGen/CodeGenMapTable.cpp | 52 ++-- llvm/utils/TableGen/DAGISelEmitter.cpp | 8 +- llvm/utils/TableGen/DFAPacketizerEmitter.cpp | 30 +- llvm/utils/TableGen/DXILEmitter.cpp | 42 +-- llvm/utils/TableGen/DirectiveEmitter.cpp | 234 +++++++-------- llvm/utils/TableGen/DisassemblerEmitter.cpp | 4 +- llvm/utils/TableGen/OptionParserEmitter.cpp | 26 +- llvm/utils/TableGen/OptionRSTEmitter.cpp | 4 +- llvm/utils/TableGen/RISCVTargetDefEmitter.cpp | 4 +- llvm/utils/TableGen/SubtargetEmitter.cpp | 266 +++++++++--------- llvm/utils/TableGen/TableGen.cpp | 12 +- llvm/utils/TableGen/VTEmitter.cpp | 4 +- 15 files changed, 493 insertions(+), 493 deletions(-) diff --git a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp index 6b8ebf96cdf383e..792d047139466cf 100644 --- a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp +++ b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp @@ -25,19 +25,19 @@ using namespace llvm; /// Collect the full set of implied features for a SubtargetFeature. -static void CollectImpliedFeatures(std::set &SeenFeats, +static void collectImpliedFeatures(std::set &SeenFeats, const Record *Rec) { assert(Rec->isSubClassOf("SubtargetFeature") && "Rec is not a SubtargetFeature"); SeenFeats.insert(Rec); for (const Record *Implied : Rec->getValueAsListOfDefs("Implies")) - CollectImpliedFeatures(SeenFeats, Implied); + collectImpliedFeatures(SeenFeats, Implied); } -static void CheckFeatureTree(const Record *Root) { +static void checkFeatureTree(const Record *Root) { std::set SeenFeats; - CollectImpliedFeatures(SeenFeats, Root); + collectImpliedFeatures(SeenFeats, Root); // Check that each of the mandatory (implied) features which is an // ExtensionWithMArch is also enabled by default. @@ -53,12 +53,12 @@ static void CheckFeatureTree(const Record *Root) { } } -static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) { +static void emitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) { OS << "// Autogenerated by ARMTargetDefEmitter.cpp\n\n"; // Look through all SubtargetFeature defs with the given FieldName, and // collect the set of all Values that that FieldName is set to. - auto gatherSubtargetFeatureFieldValues = [&RK](StringRef FieldName) { + auto GatherSubtargetFeatureFieldValues = [&RK](StringRef FieldName) { llvm::StringSet<> Set; for (const Record *Rec : RK.getAllDerivedDefinitions("SubtargetFeature")) { if (Rec->getValueAsString("FieldName") == FieldName) { @@ -88,7 +88,7 @@ static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) { << "#define ARM_PROCESSOR_FAMILY(ENUM)\n" << "#endif\n\n"; const StringSet<> ARMProcFamilyVals = - gatherSubtargetFeatureFieldValues("ARMProcFamily"); + GatherSubtargetFeatureFieldValues("ARMProcFamily"); for (const StringRef &Family : ARMProcFamilyVals.keys()) OS << "ARM_PROCESSOR_FAMILY(" << Family << ")\n"; OS << "\n#undef ARM_PROCESSOR_FAMILY\n\n"; @@ -97,7 +97,7 @@ static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) { << "#define ARM_ARCHITECTURE(ENUM)\n" << "#endif\n\n"; // This should correspond to instances of the Architecture tablegen class. - const StringSet<> ARMArchVals = gatherSubtargetFeatureFieldValues("ARMArch"); + const StringSet<> ARMArchVals = GatherSubtargetFeatureFieldValues("ARMArch"); for (const StringRef &Arch : ARMArchVals.keys()) OS << "ARM_ARCHITECTURE(" << Arch << ")\n"; OS << "\n#undef ARM_ARCHITECTURE\n\n"; @@ -315,7 +315,7 @@ static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) { auto Profile = Arch->getValueAsString("Profile"); auto ArchInfo = ArchInfoName(Major, Minor, Profile); - CheckFeatureTree(Arch); + checkFeatureTree(Arch); OS << " {\n" << " \"" << Name << "\",\n" @@ -343,5 +343,5 @@ static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) { } static TableGen::Emitter::Opt - X("gen-arm-target-def", EmitARMTargetDef, + X("gen-arm-target-def", emitARMTargetDef, "Generate the ARM or AArch64 Architecture information header."); diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index c8f263e15d96b76..de20303a5bfd208 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -35,12 +35,12 @@ class CallingConvEmitter { public: explicit CallingConvEmitter(const RecordKeeper &R) : Records(R) {} - void run(raw_ostream &o); + void run(raw_ostream &O); private: - void EmitCallingConv(const Record *CC, raw_ostream &O); - void EmitAction(const Record *Action, indent Indent, raw_ostream &O); - void EmitArgRegisterLists(raw_ostream &O); + void emitCallingConv(const Record *CC, raw_ostream &O); + void emitAction(const Record *Action, indent Indent, raw_ostream &O); + void emitArgRegisterLists(raw_ostream &O); }; } // End anonymous namespace @@ -75,16 +75,16 @@ void CallingConvEmitter::run(raw_ostream &O) { Records.getTimer().startTimer("Emit full descriptions"); for (const Record *CC : CCs) { if (!CC->getValueAsBit("Custom")) { - EmitCallingConv(CC, O); + emitCallingConv(CC, O); } } - EmitArgRegisterLists(O); + emitArgRegisterLists(O); O << "\n#endif // CC_REGISTER_LIST\n"; } -void CallingConvEmitter::EmitCallingConv(const Record *CC, raw_ostream &O) { +void CallingConvEmitter::emitCallingConv(const Record *CC, raw_ostream &O) { const ListInit *CCActions = CC->getValueAsListInit("Actions"); Counter = 0; @@ -107,8 +107,8 @@ void CallingConvEmitter::EmitCallingConv(const Record *CC, raw_ostream &O) { << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n" << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n"; // Emit all of the actions, in order. - for (unsigned i = 0, e = CCActions->size(); i != e; ++i) { - const Record *Action = CCActions->getElementAsRecord(i); + for (unsigned I = 0, E = CCActions->size(); I != E; ++I) { + const Record *Action = CCActions->getElementAsRecord(I); SwiftAction = llvm::any_of(Action->getSuperClasses(), [](const std::pair &Class) { @@ -117,23 +117,23 @@ void CallingConvEmitter::EmitCallingConv(const Record *CC, raw_ostream &O) { }); O << "\n"; - EmitAction(Action, indent(2), O); + emitAction(Action, indent(2), O); } O << "\n return true; // CC didn't match.\n"; O << "}\n"; } -void CallingConvEmitter::EmitAction(const Record *Action, indent Indent, +void CallingConvEmitter::emitAction(const Record *Action, indent Indent, raw_ostream &O) { if (Action->isSubClassOf("CCPredicateAction")) { O << Indent << "if ("; if (Action->isSubClassOf("CCIfType")) { const ListInit *VTs = Action->getValueAsListInit("VTs"); - for (unsigned i = 0, e = VTs->size(); i != e; ++i) { - const Record *VT = VTs->getElementAsRecord(i); - if (i != 0) + for (unsigned I = 0, E = VTs->size(); I != E; ++I) { + const Record *VT = VTs->getElementAsRecord(I); + if (I != 0) O << " ||\n " << Indent; O << "LocVT == " << getEnumName(getValueType(VT)); } @@ -146,7 +146,7 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent, } O << ") {\n"; - EmitAction(Action->getValueAsDef("SubAction"), Indent + 2, O); + emitAction(Action->getValueAsDef("SubAction"), Indent + 2, O); O << Indent << "}\n"; } else { if (Action->isSubClassOf("CCDelegateTo")) { @@ -171,8 +171,8 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent, << "[] = {\n"; O << Indent << " "; ListSeparator LS; - for (unsigned i = 0, e = RegList->size(); i != e; ++i) { - std::string Name = getQualifiedName(RegList->getElementAsRecord(i)); + for (unsigned I = 0, E = RegList->size(); I != E; ++I) { + std::string Name = getQualifiedName(RegList->getElementAsRecord(I)); if (SwiftAction) AssignedSwiftRegsMap[CurrentAction].insert(Name); else @@ -230,16 +230,16 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent, << "[] = {\n"; O << Indent << " "; ListSeparator LS; - for (unsigned i = 0, e = RegList->size(); i != e; ++i) - O << LS << getQualifiedName(RegList->getElementAsRecord(i)); + for (unsigned I = 0, E = RegList->size(); I != E; ++I) + O << LS << getQualifiedName(RegList->getElementAsRecord(I)); O << "\n" << Indent << "};\n"; O << Indent << "static const MCPhysReg RegList" << ShadowRegListNumber << "[] = {\n"; O << Indent << " "; ListSeparator LSS; - for (unsigned i = 0, e = ShadowRegList->size(); i != e; ++i) - O << LSS << getQualifiedName(ShadowRegList->getElementAsRecord(i)); + for (unsigned I = 0, E = ShadowRegList->size(); I != E; ++I) + O << LSS << getQualifiedName(ShadowRegList->getElementAsRecord(I)); O << "\n" << Indent << "};\n"; O << Indent << "if (MCRegister Reg = State.AllocateReg(RegList" @@ -287,8 +287,8 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent, << ShadowRegListNumber << "[] = {\n"; O << Indent << " "; ListSeparator LS; - for (unsigned i = 0, e = ShadowRegList->size(); i != e; ++i) - O << LS << getQualifiedName(ShadowRegList->getElementAsRecord(i)); + for (unsigned I = 0, E = ShadowRegList->size(); I != E; ++I) + O << LS << getQualifiedName(ShadowRegList->getElementAsRecord(I)); O << "\n" << Indent << "};\n"; O << Indent << "int64_t Offset" << ++Counter << " = State.AllocateStack(" @@ -357,7 +357,7 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent, } } -void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) { +void CallingConvEmitter::emitArgRegisterLists(raw_ostream &O) { // Transitively merge all delegated CCs into AssignedRegsMap. using EntryTy = std::pair>; bool Redo; diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp index be822c4815289cd..407ee81b7e0b6c3 100644 --- a/llvm/utils/TableGen/CodeEmitterGen.cpp +++ b/llvm/utils/TableGen/CodeEmitterGen.cpp @@ -52,10 +52,10 @@ class CodeEmitterGen { public: CodeEmitterGen(const RecordKeeper &R) : Records(R) {} - void run(raw_ostream &o); + void run(raw_ostream &O); private: - int getVariableBit(const std::string &VarName, const BitsInit *BI, int bit); + int getVariableBit(const std::string &VarName, const BitsInit *BI, int Bit); std::pair getInstructionCases(const Record *R, const CodeGenTarget &Target); void addInstructionCasesForEncoding(const Record *R, @@ -69,10 +69,10 @@ class CodeEmitterGen { const CodeGenTarget &Target); void emitInstructionBaseValues( - raw_ostream &o, ArrayRef NumberedInstructions, + raw_ostream &O, ArrayRef NumberedInstructions, const CodeGenTarget &Target, unsigned HwMode = DefaultMode); void - emitCaseMap(raw_ostream &o, + emitCaseMap(raw_ostream &O, const std::map> &CaseMap); unsigned BitWidth = 0u; bool UseAPInt = false; @@ -81,12 +81,12 @@ class CodeEmitterGen { // If the VarBitInit at position 'bit' matches the specified variable then // return the variable bit position. Otherwise return -1. int CodeEmitterGen::getVariableBit(const std::string &VarName, - const BitsInit *BI, int bit) { - if (const VarBitInit *VBI = dyn_cast(BI->getBit(bit))) { + const BitsInit *BI, int Bit) { + if (const VarBitInit *VBI = dyn_cast(BI->getBit(Bit))) { if (const VarInit *VI = dyn_cast(VBI->getBitVar())) if (VI->getName() == VarName) return VBI->getBitNum(); - } else if (const VarInit *VI = dyn_cast(BI->getBit(bit))) { + } else if (const VarInit *VI = dyn_cast(BI->getBit(Bit))) { if (VI->getName() == VarName) return 0; } @@ -104,19 +104,19 @@ bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R, CodeGenInstruction &CGI = Target.getInstruction(R); // Determine if VarName actually contributes to the Inst encoding. - int bit = BI->getNumBits() - 1; + int Bit = BI->getNumBits() - 1; // Scan for a bit that this contributed to. - for (; bit >= 0;) { - if (getVariableBit(VarName, BI, bit) != -1) + for (; Bit >= 0;) { + if (getVariableBit(VarName, BI, Bit) != -1) break; - --bit; + --Bit; } // If we found no bits, ignore this value, otherwise emit the call to get the // operand encoding. - if (bit < 0) + if (Bit < 0) return true; // If the operand matches by name, reference according to that @@ -175,97 +175,97 @@ bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R, // Precalculate the number of lits this variable contributes to in the // operand. If there is a single lit (consecutive range of bits) we can use a // destructive sequence on APInt that reduces memory allocations. - int numOperandLits = 0; - for (int tmpBit = bit; tmpBit >= 0;) { - int varBit = getVariableBit(VarName, BI, tmpBit); + int NumOperandLits = 0; + for (int TmpBit = Bit; TmpBit >= 0;) { + int VarBit = getVariableBit(VarName, BI, TmpBit); // If this bit isn't from a variable, skip it. - if (varBit == -1) { - --tmpBit; + if (VarBit == -1) { + --TmpBit; continue; } // Figure out the consecutive range of bits covered by this operand, in // order to generate better encoding code. - int beginVarBit = varBit; + int BeginVarBit = VarBit; int N = 1; - for (--tmpBit; tmpBit >= 0;) { - varBit = getVariableBit(VarName, BI, tmpBit); - if (varBit == -1 || varBit != (beginVarBit - N)) + for (--TmpBit; TmpBit >= 0;) { + VarBit = getVariableBit(VarName, BI, TmpBit); + if (VarBit == -1 || VarBit != (BeginVarBit - N)) break; ++N; - --tmpBit; + --TmpBit; } - ++numOperandLits; + ++NumOperandLits; } unsigned BitOffset = -1; - for (; bit >= 0;) { - int varBit = getVariableBit(VarName, BI, bit); + for (; Bit >= 0;) { + int VarBit = getVariableBit(VarName, BI, Bit); // If this bit isn't from a variable, skip it. - if (varBit == -1) { - --bit; + if (VarBit == -1) { + --Bit; continue; } // Figure out the consecutive range of bits covered by this operand, in // order to generate better encoding code. - int beginInstBit = bit; - int beginVarBit = varBit; + int BeginInstBit = Bit; + int BeginVarBit = VarBit; int N = 1; - for (--bit; bit >= 0;) { - varBit = getVariableBit(VarName, BI, bit); - if (varBit == -1 || varBit != (beginVarBit - N)) + for (--Bit; Bit >= 0;) { + VarBit = getVariableBit(VarName, BI, Bit); + if (VarBit == -1 || VarBit != (BeginVarBit - N)) break; ++N; - --bit; + --Bit; } - std::string maskStr; - int opShift; + std::string MaskStr; + int OpShift; - unsigned loBit = beginVarBit - N + 1; - unsigned hiBit = loBit + N; - unsigned loInstBit = beginInstBit - N + 1; - BitOffset = loInstBit; + unsigned LoBit = BeginVarBit - N + 1; + unsigned HiBit = LoBit + N; + unsigned LoInstBit = BeginInstBit - N + 1; + BitOffset = LoInstBit; if (UseAPInt) { - std::string extractStr; + std::string ExtractStr; if (N >= 64) { - extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " + - itostr(loBit) + ")"; - Case += " Value.insertBits(" + extractStr + ", " + - itostr(loInstBit) + ");\n"; + ExtractStr = "op.extractBits(" + itostr(HiBit - LoBit) + ", " + + itostr(LoBit) + ")"; + Case += " Value.insertBits(" + ExtractStr + ", " + + itostr(LoInstBit) + ");\n"; } else { - extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) + - ", " + itostr(loBit) + ")"; - Case += " Value.insertBits(" + extractStr + ", " + - itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n"; + ExtractStr = "op.extractBitsAsZExtValue(" + itostr(HiBit - LoBit) + + ", " + itostr(LoBit) + ")"; + Case += " Value.insertBits(" + ExtractStr + ", " + + itostr(LoInstBit) + ", " + itostr(HiBit - LoBit) + ");\n"; } } else { - uint64_t opMask = ~(uint64_t)0 >> (64 - N); - opShift = beginVarBit - N + 1; - opMask <<= opShift; - maskStr = "UINT64_C(" + utostr(opMask) + ")"; - opShift = beginInstBit - beginVarBit; - - if (numOperandLits == 1) { - Case += " op &= " + maskStr + ";\n"; - if (opShift > 0) { - Case += " op <<= " + itostr(opShift) + ";\n"; - } else if (opShift < 0) { - Case += " op >>= " + itostr(-opShift) + ";\n"; + uint64_t OpMask = ~(uint64_t)0 >> (64 - N); + OpShift = BeginVarBit - N + 1; + OpMask <<= OpShift; + MaskStr = "UINT64_C(" + utostr(OpMask) + ")"; + OpShift = BeginInstBit - BeginVarBit; + + if (NumOperandLits == 1) { + Case += " op &= " + MaskStr + ";\n"; + if (OpShift > 0) { + Case += " op <<= " + itostr(OpShift) + ";\n"; + } else if (OpShift < 0) { + Case += " op >>= " + itostr(-OpShift) + ";\n"; } Case += " Value |= op;\n"; } else { - if (opShift > 0) { - Case += " Value |= (op & " + maskStr + ") << " + - itostr(opShift) + ";\n"; - } else if (opShift < 0) { - Case += " Value |= (op & " + maskStr + ") >> " + - itostr(-opShift) + ";\n"; + if (OpShift > 0) { + Case += " Value |= (op & " + MaskStr + ") << " + + itostr(OpShift) + ";\n"; + } else if (OpShift < 0) { + Case += " Value |= (op & " + MaskStr + ") >> " + + itostr(-OpShift) + ";\n"; } else { - Case += " Value |= (op & " + maskStr + ");\n"; + Case += " Value |= (op & " + MaskStr + ");\n"; } } } @@ -285,7 +285,7 @@ CodeEmitterGen::getInstructionCases(const Record *R, const CodeGenTarget &Target) { std::string Case, BitOffsetCase; - auto append = [&](const std::string &S) { + auto Append = [&](const std::string &S) { Case += S; BitOffsetCase += S; }; @@ -298,7 +298,7 @@ CodeEmitterGen::getInstructionCases(const Record *R, // Invoke the interface to obtain the HwMode ID controlling the // EncodingInfo for the current subtarget. This interface will // mask off irrelevant HwMode IDs. - append(" unsigned HwMode = " + Append(" unsigned HwMode = " "STI.getHwMode(MCSubtargetInfo::HwMode_EncodingInfo);\n"); Case += " switch (HwMode) {\n"; Case += " default: llvm_unreachable(\"Unknown hardware mode!\"); " @@ -328,16 +328,16 @@ CodeEmitterGen::getInstructionCases(const Record *R, Case += " Value = InstBitsByHw[opcode];\n"; } - append(" switch (HwMode) {\n"); - append(" default: llvm_unreachable(\"Unhandled HwMode\");\n"); + Append(" switch (HwMode) {\n"); + Append(" default: llvm_unreachable(\"Unhandled HwMode\");\n"); for (auto &[ModeId, Encoding] : EBM) { - append(" case " + itostr(ModeId) + ": {\n"); + Append(" case " + itostr(ModeId) + ": {\n"); addInstructionCasesForEncoding(R, Encoding, Target, Case, BitOffsetCase); - append(" break;\n"); - append(" }\n"); + Append(" break;\n"); + Append(" }\n"); } - append(" }\n"); + Append(" }\n"); return std::pair(std::move(Case), std::move(BitOffsetCase)); } } @@ -397,13 +397,13 @@ static void emitInstBits(raw_ostream &OS, const APInt &Bits) { } void CodeEmitterGen::emitInstructionBaseValues( - raw_ostream &o, ArrayRef NumberedInstructions, + raw_ostream &O, ArrayRef NumberedInstructions, const CodeGenTarget &Target, unsigned HwMode) { const CodeGenHwModes &HWM = Target.getHwModes(); if (HwMode == DefaultMode) - o << " static const uint64_t InstBits[] = {\n"; + O << " static const uint64_t InstBits[] = {\n"; else - o << " static const uint64_t InstBits_" + O << " static const uint64_t InstBits_" << HWM.getModeName(HwMode, /*IncludeDefault=*/true) << "[] = {\n"; for (const CodeGenInstruction *CGI : NumberedInstructions) { @@ -411,9 +411,9 @@ void CodeEmitterGen::emitInstructionBaseValues( if (R->getValueAsString("Namespace") == "TargetOpcode" || R->getValueAsBit("isPseudo")) { - o << " "; - emitInstBits(o, APInt(BitWidth, 0)); - o << ",\n"; + O << " "; + emitInstBits(O, APInt(BitWidth, 0)); + O << ",\n"; continue; } @@ -427,9 +427,9 @@ void CodeEmitterGen::emitInstructionBaseValues( // If the HwMode does not match, then Encoding '0' // should be generated. APInt Value(BitWidth, 0); - o << " "; - emitInstBits(o, Value); - o << "," << '\t' << "// " << R->getName() << "\n"; + O << " "; + emitInstBits(O, Value); + O << "," << '\t' << "// " << R->getName() << "\n"; continue; } } @@ -438,37 +438,37 @@ void CodeEmitterGen::emitInstructionBaseValues( // Start by filling in fixed values. APInt Value(BitWidth, 0); - for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) { - if (const auto *B = dyn_cast(BI->getBit(i)); B && B->getValue()) - Value.setBit(i); + for (unsigned I = 0, E = BI->getNumBits(); I != E; ++I) { + if (const auto *B = dyn_cast(BI->getBit(I)); B && B->getValue()) + Value.setBit(I); } - o << " "; - emitInstBits(o, Value); - o << "," << '\t' << "// " << R->getName() << "\n"; + O << " "; + emitInstBits(O, Value); + O << "," << '\t' << "// " << R->getName() << "\n"; } - o << " UINT64_C(0)\n };\n"; + O << " UINT64_C(0)\n };\n"; } void CodeEmitterGen::emitCaseMap( - raw_ostream &o, + raw_ostream &O, const std::map> &CaseMap) { for (const auto &[Case, InstList] : CaseMap) { bool First = true; for (const auto &Inst : InstList) { if (!First) - o << "\n"; - o << " case " << Inst << ":"; + O << "\n"; + O << " case " << Inst << ":"; First = false; } - o << " {\n"; - o << Case; - o << " break;\n" + O << " {\n"; + O << Case; + O << " break;\n" << " }\n"; } } -void CodeEmitterGen::run(raw_ostream &o) { - emitSourceFileHeader("Machine Code Emitter", o); +void CodeEmitterGen::run(raw_ostream &O) { + emitSourceFileHeader("Machine Code Emitter", O); CodeGenTarget Target(Records); @@ -479,7 +479,7 @@ void CodeEmitterGen::run(raw_ostream &o) { Target.getInstructionsByEnumValue(); if (Target.hasVariableLengthEncodings()) { - emitVarLenCodeEmitter(Records, o); + emitVarLenCodeEmitter(Records, O); } else { const CodeGenHwModes &HWM = Target.getHwModes(); // The set of HwModes used by instruction encodings. @@ -509,31 +509,31 @@ void CodeEmitterGen::run(raw_ostream &o) { // Emit function declaration if (UseAPInt) { - o << "void " << Target.getName() + O << "void " << Target.getName() << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" << " SmallVectorImpl &Fixups,\n" << " APInt &Inst,\n" << " APInt &Scratch,\n" << " const MCSubtargetInfo &STI) const {\n"; } else { - o << "uint64_t " << Target.getName(); - o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" + O << "uint64_t " << Target.getName(); + O << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n" << " SmallVectorImpl &Fixups,\n" << " const MCSubtargetInfo &STI) const {\n"; } // Emit instruction base values - emitInstructionBaseValues(o, NumberedInstructions, Target, DefaultMode); + emitInstructionBaseValues(O, NumberedInstructions, Target, DefaultMode); if (!HwModes.empty()) { // Emit table for instrs whose encodings are controlled by HwModes. for (unsigned HwMode : HwModes) { if (HwMode == DefaultMode) continue; - emitInstructionBaseValues(o, NumberedInstructions, Target, HwMode); + emitInstructionBaseValues(O, NumberedInstructions, Target, HwMode); } // This pointer will be assigned to the HwMode table later. - o << " const uint64_t *InstBitsByHw;\n"; + O << " const uint64_t *InstBitsByHw;\n"; } // Map to accumulate all the cases. @@ -557,7 +557,7 @@ void CodeEmitterGen::run(raw_ostream &o) { // Emit initial function code if (UseAPInt) { int NumWords = APInt::getNumWords(BitWidth); - o << " const unsigned opcode = MI.getOpcode();\n" + O << " const unsigned opcode = MI.getOpcode();\n" << " if (Scratch.getBitWidth() != " << BitWidth << ")\n" << " Scratch = Scratch.zext(" << BitWidth << ");\n" << " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * " @@ -566,7 +566,7 @@ void CodeEmitterGen::run(raw_ostream &o) { << " APInt &op = Scratch;\n" << " switch (opcode) {\n"; } else { - o << " const unsigned opcode = MI.getOpcode();\n" + O << " const unsigned opcode = MI.getOpcode();\n" << " uint64_t Value = InstBits[opcode];\n" << " uint64_t op = 0;\n" << " (void)op; // suppress warning\n" @@ -574,30 +574,30 @@ void CodeEmitterGen::run(raw_ostream &o) { } // Emit each case statement - emitCaseMap(o, CaseMap); + emitCaseMap(O, CaseMap); // Default case: unhandled opcode - o << " default:\n" + O << " default:\n" << " std::string msg;\n" << " raw_string_ostream Msg(msg);\n" << " Msg << \"Not supported instr: \" << MI;\n" << " report_fatal_error(Msg.str().c_str());\n" << " }\n"; if (UseAPInt) - o << " Inst = Value;\n"; + O << " Inst = Value;\n"; else - o << " return Value;\n"; - o << "}\n\n"; + O << " return Value;\n"; + O << "}\n\n"; - o << "#ifdef GET_OPERAND_BIT_OFFSET\n" + O << "#ifdef GET_OPERAND_BIT_OFFSET\n" << "#undef GET_OPERAND_BIT_OFFSET\n\n" << "uint32_t " << Target.getName() << "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n" << " unsigned OpNum,\n" << " const MCSubtargetInfo &STI) const {\n" << " switch (MI.getOpcode()) {\n"; - emitCaseMap(o, BitOffsetCaseMap); - o << " }\n" + emitCaseMap(O, BitOffsetCaseMap); + O << " }\n" << " std::string msg;\n" << " raw_string_ostream Msg(msg);\n" << " Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum " diff --git a/llvm/utils/TableGen/CodeGenMapTable.cpp b/llvm/utils/TableGen/CodeGenMapTable.cpp index 7876db6f33dfdff..8d22c0013dda881 100644 --- a/llvm/utils/TableGen/CodeGenMapTable.cpp +++ b/llvm/utils/TableGen/CodeGenMapTable.cpp @@ -258,12 +258,12 @@ bool MapTableEmitter::isKeyColInstr(const Record *CurInstr) { // Check if the instruction is a KeyCol instruction. bool MatchFound = true; - for (unsigned j = 0, endCF = ColFields->size(); (j < endCF) && MatchFound; - j++) { + for (unsigned J = 0, EndCf = ColFields->size(); (J < EndCf) && MatchFound; + J++) { const RecordVal *ColFieldName = - CurInstr->getValue(ColFields->getElement(j)); + CurInstr->getValue(ColFields->getElement(J)); std::string CurInstrVal = ColFieldName->getValue()->getAsUnquotedString(); - std::string KeyColValue = KeyCol->getElement(j)->getAsUnquotedString(); + std::string KeyColValue = KeyCol->getElement(J)->getAsUnquotedString(); MatchFound = CurInstrVal == KeyColValue; } return MatchFound; @@ -318,12 +318,12 @@ const Record *MapTableEmitter::getInstrForColumn(const Record *KeyInstr, for (const Record *CurInstr : RelatedInstrVec) { bool MatchFound = true; - for (unsigned j = 0, endCF = ColFields->size(); (j < endCF) && MatchFound; - j++) { - const Init *ColFieldJ = ColFields->getElement(j); + for (unsigned J = 0, EndCf = ColFields->size(); (J < EndCf) && MatchFound; + J++) { + const Init *ColFieldJ = ColFields->getElement(J); const Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue(); std::string CurInstrVal = CurInstrInit->getAsUnquotedString(); - const Init *ColFieldJVallue = CurValueCol->getElement(j); + const Init *ColFieldJVallue = CurValueCol->getElement(J); MatchFound = CurInstrVal == ColFieldJVallue->getAsUnquotedString(); } @@ -368,19 +368,19 @@ unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) { // Number of columns in the table are NumCol+1 because key instructions are // emitted as first column. OS << "Table[][" << NumCol + 1 << "] = {\n"; - for (unsigned i = 0; i < TotalNumInstr; i++) { - const Record *CurInstr = NumberedInstructions[i]->TheDef; + for (unsigned I = 0; I < TotalNumInstr; I++) { + const Record *CurInstr = NumberedInstructions[I]->TheDef; ArrayRef ColInstrs = MapTable[CurInstr]; std::string OutStr; unsigned RelExists = 0; if (!ColInstrs.empty()) { - for (unsigned j = 0; j < NumCol; j++) { - if (ColInstrs[j] != nullptr) { + for (unsigned J = 0; J < NumCol; J++) { + if (ColInstrs[J] != nullptr) { RelExists = 1; OutStr += ", "; OutStr += Namespace; OutStr += "::"; - OutStr += ColInstrs[j]->getName(); + OutStr += ColInstrs[J]->getName(); } else { OutStr += ", (uint16_t)-1U"; } @@ -441,20 +441,20 @@ void MapTableEmitter::emitMapFuncBody(raw_ostream &OS, unsigned TableSize) { emitBinSearch(OS, TableSize); if (ValueCols.size() > 1) { - for (unsigned i = 0, e = ValueCols.size(); i < e; i++) { - const ListInit *ColumnI = ValueCols[i]; + for (unsigned I = 0, E = ValueCols.size(); I < E; I++) { + const ListInit *ColumnI = ValueCols[I]; OS << " if ("; - for (unsigned j = 0, ColSize = ColumnI->size(); j < ColSize; ++j) { - std::string ColName = ColFields->getElement(j)->getAsUnquotedString(); + for (unsigned J = 0, ColSize = ColumnI->size(); J < ColSize; ++J) { + std::string ColName = ColFields->getElement(J)->getAsUnquotedString(); OS << "in" << ColName; OS << " == "; - OS << ColName << "_" << ColumnI->getElement(j)->getAsUnquotedString(); - if (j < ColumnI->size() - 1) + OS << ColName << "_" << ColumnI->getElement(J)->getAsUnquotedString(); + if (J < ColumnI->size() - 1) OS << " && "; } OS << ")\n"; OS << " return " << InstrMapDesc.getName(); - OS << "Table[mid][" << i + 1 << "];\n"; + OS << "Table[mid][" << I + 1 << "];\n"; } OS << " return -1;"; } else @@ -509,8 +509,8 @@ static void emitEnums(raw_ostream &OS, const RecordKeeper &Records) { std::vector ValueCols; unsigned ListSize = List->size(); - for (unsigned j = 0; j < ListSize; j++) { - const auto *ListJ = cast(List->getElement(j)); + for (unsigned J = 0; J < ListSize; J++) { + const auto *ListJ = cast(List->getElement(J)); if (ListJ->size() != ColFields->size()) PrintFatalError("Record `" + CurMap->getName() + @@ -520,10 +520,10 @@ static void emitEnums(raw_ostream &OS, const RecordKeeper &Records) { ValueCols.push_back(ListJ); } - for (unsigned j = 0, endCF = ColFields->size(); j < endCF; j++) { - for (unsigned k = 0; k < ListSize; k++) { - std::string ColName = ColFields->getElement(j)->getAsUnquotedString(); - ColFieldValueMap[ColName].push_back((ValueCols[k])->getElement(j)); + for (unsigned J = 0, EndCf = ColFields->size(); J < EndCf; J++) { + for (unsigned K = 0; K < ListSize; K++) { + std::string ColName = ColFields->getElement(J)->getAsUnquotedString(); + ColFieldValueMap[ColName].push_back((ValueCols[K])->getElement(J)); } } } diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp index d3b653b0fba27fa..3d39ee148373fd0 100644 --- a/llvm/utils/TableGen/DAGISelEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelEmitter.cpp @@ -55,8 +55,8 @@ static unsigned getResultPatternCost(TreePatternNode &P, if (II.usesCustomInserter) Cost += 10; } - for (unsigned i = 0, e = P.getNumChildren(); i != e; ++i) - Cost += getResultPatternCost(P.getChild(i), CGP); + for (unsigned I = 0, E = P.getNumChildren(); I != E; ++I) + Cost += getResultPatternCost(P.getChild(I), CGP); return Cost; } @@ -72,8 +72,8 @@ static unsigned getResultPatternSize(TreePatternNode &P, if (Op->isSubClassOf("Instruction")) { Cost += Op->getValueAsInt("CodeSize"); } - for (unsigned i = 0, e = P.getNumChildren(); i != e; ++i) - Cost += getResultPatternSize(P.getChild(i), CGP); + for (unsigned I = 0, E = P.getNumChildren(); I != E; ++I) + Cost += getResultPatternSize(P.getChild(I), CGP); return Cost; } diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp index 537bee55978bd6c..a6c0d09f69ba342 100644 --- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp @@ -105,7 +105,7 @@ int DFAPacketizerEmitter::collectAllFuncUnits( for (const CodeGenProcModel *Model : ProcModels) ProcItinList.insert(Model->ItinsDef); - int totalFUs = 0; + int TotalFUs = 0; // Parse functional units for all the itineraries. for (const Record *Proc : ProcItinList) { std::vector FUs = Proc->getValueAsListOfDefs("FU"); @@ -123,10 +123,10 @@ int DFAPacketizerEmitter::collectAllFuncUnits( LLVM_DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x" << Twine::utohexstr(FuncResources)); } - totalFUs += numFUs; + TotalFUs += numFUs; LLVM_DEBUG(dbgs() << "\n"); } - return totalFUs; + return TotalFUs; } int DFAPacketizerEmitter::collectAllComboFuncs( @@ -136,19 +136,19 @@ int DFAPacketizerEmitter::collectAllComboFuncs( LLVM_DEBUG(dbgs() << "collectAllComboFuncs"); LLVM_DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n"); - int numCombos = 0; - for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) { - const Record *Func = ComboFuncList[i]; + int NumCombos = 0; + for (unsigned I = 0, N = ComboFuncList.size(); I < N; ++I) { + const Record *Func = ComboFuncList[I]; std::vector FUs = Func->getValueAsListOfDefs("CFD"); - LLVM_DEBUG(dbgs() << " CFD:" << i << " (" << FUs.size() << " combo FUs) " + LLVM_DEBUG(dbgs() << " CFD:" << I << " (" << FUs.size() << " combo FUs) " << Func->getName() << "\n"); // Convert macros to bits for each stage. - for (unsigned j = 0, N = FUs.size(); j < N; ++j) { - assert((j < DFA_MAX_RESOURCES) && + for (unsigned J = 0, N = FUs.size(); J < N; ++J) { + assert((J < DFA_MAX_RESOURCES) && "Exceeded maximum number of DFA resources"); - const Record *FuncData = FUs[j]; + const Record *FuncData = FUs[J]; const Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc"); const std::vector FuncList = FuncData->getValueAsListOfDefs("FuncList"); @@ -165,13 +165,13 @@ int DFAPacketizerEmitter::collectAllComboFuncs( ComboResources |= FuncResources; } ComboBitToBitsMap[ComboBit] = ComboResources; - numCombos++; + NumCombos++; LLVM_DEBUG(dbgs() << " => combo bits: " << ComboFuncName << ":0x" << Twine::utohexstr(ComboBit) << " = 0x" << Twine::utohexstr(ComboResources) << "\n"); } } - return numCombos; + return NumCombos; } ResourceVector @@ -271,7 +271,7 @@ void DFAPacketizerEmitter::emitForItineraries( // Given a resource state, return all resource states by applying // InsnClass. - auto applyInsnClass = [&](const ResourceVector &InsnClass, + auto ApplyInsnClass = [&](const ResourceVector &InsnClass, NfaStateTy State) -> std::deque { std::deque V(1, State); // Apply every stage in the class individually. @@ -304,7 +304,7 @@ void DFAPacketizerEmitter::emitForItineraries( // Given a resource state, return a quick (conservative) guess as to whether // InsnClass can be applied. This is a filter for the more heavyweight - // applyInsnClass. + // ApplyInsnClass. auto canApplyInsnClass = [](const ResourceVector &InsnClass, NfaStateTy State) -> bool { for (NfaStateTy Resources : InsnClass) { @@ -325,7 +325,7 @@ void DFAPacketizerEmitter::emitForItineraries( if (!canApplyInsnClass(Resources, State)) continue; unsigned ResourcesID = UniqueResources.idFor(Resources); - for (uint64_t NewState : applyInsnClass(Resources, State)) { + for (uint64_t NewState : ApplyInsnClass(Resources, State)) { if (SeenStates.emplace(NewState).second) Worklist.emplace_back(NewState); Emitter.addTransition(State, NewState, ResourcesID); diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index 8594233244638d0..8bebe608eece47d 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -61,7 +61,7 @@ struct DXILOperationDesc { ShaderStages; // shader stages to which this applies, empty for all. int OverloadParamIndex; // Index of parameter with overload type. // -1 : no overload types - SmallVector counters; // counters for this inst. + SmallVector Counters; // counters for this inst. DXILOperationDesc(const Record *); }; } // end anonymous namespace @@ -69,7 +69,7 @@ struct DXILOperationDesc { /// In-place sort TableGen records of class with a field /// Version dxil_version /// in the ascending version order. -static void AscendingSortByVersion(std::vector &Recs) { +static void ascendingSortByVersion(std::vector &Recs) { sort(Recs, [](const Record *RecA, const Record *RecB) { unsigned RecAMaj = RecA->getValueAsDef("dxil_version")->getValueAsInt("Major"); @@ -125,8 +125,8 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { // the comment before the definition of class LLVMMatchType in // llvm/IR/Intrinsics.td OverloadParamIndex = -1; // A sigil meaning none. - for (unsigned i = 0; i < ParamTypeRecsSize; i++) { - const Record *TR = ParamTypeRecs[i]; + for (unsigned I = 0; I < ParamTypeRecsSize; I++) { + const Record *TR = ParamTypeRecs[I]; // Track operation parameter indices of any overload types if (TR->getValueAsInt("isOverload")) { if (OverloadParamIndex != -1) { @@ -137,7 +137,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { // Keep the earliest parameter index we see, but if it was the return type // overwrite it with the first overloaded argument. if (OverloadParamIndex <= 0) - OverloadParamIndex = i; + OverloadParamIndex = I; } OpTypes.emplace_back(TR); } @@ -146,7 +146,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { std::vector Recs = R->getValueAsListOfDefs("overloads"); // Sort records in ascending order of DXIL version - AscendingSortByVersion(Recs); + ascendingSortByVersion(Recs); for (const Record *CR : Recs) { OverloadRecs.push_back(CR); @@ -161,7 +161,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { } // Sort records in ascending order of DXIL version - AscendingSortByVersion(Recs); + ascendingSortByVersion(Recs); for (const Record *CR : Recs) { StageRecs.push_back(CR); @@ -171,7 +171,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) { Recs = R->getValueAsListOfDefs("attributes"); // Sort records in ascending order of DXIL version - AscendingSortByVersion(Recs); + ascendingSortByVersion(Recs); for (const Record *CR : Recs) { AttrRecs.push_back(CR); @@ -286,7 +286,7 @@ static std::string getOverloadMaskString(ArrayRef Recs) { if (Recs.empty()) { MaskString.append("{{1, 0}, OverloadKind::UNDEFINED}}"); } else { - for (auto Rec : Recs) { + for (const auto *Rec : Recs) { unsigned Major = Rec->getValueAsDef("dxil_version")->getValueAsInt("Major"); unsigned Minor = @@ -332,7 +332,7 @@ static std::string getStageMaskString(ArrayRef Recs) { "operation must be specified"); } - for (auto Rec : Recs) { + for (const auto *Rec : Recs) { unsigned Major = Rec->getValueAsDef("dxil_version")->getValueAsInt("Major"); unsigned Minor = Rec->getValueAsDef("dxil_version")->getValueAsInt("Minor"); MaskString.append(Prefix) @@ -370,7 +370,7 @@ static std::string getAttributeMaskString(ArrayRef Recs) { std::string Prefix = ""; MaskString.append("{"); - for (auto Rec : Recs) { + for (const auto *Rec : Recs) { unsigned Major = Rec->getValueAsDef("dxil_version")->getValueAsInt("Major"); unsigned Minor = Rec->getValueAsDef("dxil_version")->getValueAsInt("Minor"); MaskString.append(Prefix) @@ -576,21 +576,21 @@ static void emitDXILOperationTableDataStructs(const RecordKeeper &Records, size_t ShaderKindCount = ShaderKindRecs.size(); uint64_t ShaderKindTySz = PowerOf2Ceil(ShaderKindRecs.size() + 1); OS << "enum ShaderKind : uint" << ShaderKindTySz << "_t {\n"; - const std::string allStages("all_stages"); - const std::string removed("removed"); - int shiftVal = 1; - for (auto R : ShaderKindRecs) { + const std::string AllStages("all_stages"); + const std::string Removed("removed"); + int ShiftVal = 1; + for (const auto *R : ShaderKindRecs) { auto Name = R->getName(); - if (Name.compare(removed) == 0) { + if (Name.compare(Removed) == 0) { OS << " " << Name << " = 0, // Pseudo-stage indicating op not supported in any " "stage\n"; - } else if (Name.compare(allStages) == 0) { + } else if (Name.compare(AllStages) == 0) { OS << " " << Name << " = 0x" << utohexstr(((1 << ShaderKindCount) - 1), false, 0) << ", // Pseudo-stage indicating op is supported in all stages\n"; - } else if (Name.compare(allStages)) { - OS << " " << Name << " = 1 << " << std::to_string(shiftVal++) << ",\n"; + } else if (Name.compare(AllStages)) { + OS << " " << Name << " = 1 << " << std::to_string(ShiftVal++) << ",\n"; } } OS << "}; // enum ShaderKind\n\n"; @@ -599,7 +599,7 @@ static void emitDXILOperationTableDataStructs(const RecordKeeper &Records, /// Entry function call that invokes the functionality of this TableGen backend /// \param Records TableGen records of DXIL Operations defined in DXIL.td /// \param OS output stream -static void EmitDXILOperation(const RecordKeeper &Records, raw_ostream &OS) { +static void emitDxilOperation(const RecordKeeper &Records, raw_ostream &OS) { OS << "// Generated code, do not edit.\n"; OS << "\n"; // Get all DXIL Ops property records @@ -631,5 +631,5 @@ static void EmitDXILOperation(const RecordKeeper &Records, raw_ostream &OS) { OS << "#endif\n\n"; } -static TableGen::Emitter::Opt X("gen-dxil-operation", EmitDXILOperation, +static TableGen::Emitter::Opt X("gen-dxil-operation", emitDxilOperation, "Generate DXIL operation information"); diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp index 9dc29d8262fa2ca..fd815f4a31dad8d 100644 --- a/llvm/utils/TableGen/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/DirectiveEmitter.cpp @@ -46,7 +46,7 @@ class IfDefScope { // Generate enum class. Entries are emitted in the order in which they appear // in the `Records` vector. -static void GenerateEnumClass(ArrayRef Records, raw_ostream &OS, +static void generateEnumClass(ArrayRef Records, raw_ostream &OS, StringRef Enum, StringRef Prefix, const DirectiveLanguage &DirLang, bool ExportEnums) { @@ -79,7 +79,7 @@ static void GenerateEnumClass(ArrayRef Records, raw_ostream &OS, // Generate enums for values that clauses can take. // Also generate function declarations for getName(StringRef Str). -static void GenerateEnumClauseVal(ArrayRef Records, +static void generateEnumClauseVal(ArrayRef Records, raw_ostream &OS, const DirectiveLanguage &DirLang, std::string &EnumHelperFuncs) { @@ -121,13 +121,13 @@ static void GenerateEnumClauseVal(ArrayRef Records, } } -static bool HasDuplicateClauses(ArrayRef Clauses, +static bool hasDuplicateClauses(ArrayRef Clauses, const Directive &Directive, StringSet<> &CrtClauses) { bool HasError = false; for (const VersionedClause VerClause : Clauses) { - const auto insRes = CrtClauses.insert(VerClause.getClause().getName()); - if (!insRes.second) { + const auto InsRes = CrtClauses.insert(VerClause.getClause().getName()); + if (!InsRes.second) { PrintError("Clause " + VerClause.getClause().getRecordName() + " already defined on directive " + Directive.getRecordName()); HasError = true; @@ -140,20 +140,20 @@ static bool HasDuplicateClauses(ArrayRef Clauses, // three allowed list. Also, since required implies allowed, clauses cannot // appear in both the allowedClauses and requiredClauses lists. static bool -HasDuplicateClausesInDirectives(ArrayRef Directives) { +hasDuplicateClausesInDirectives(ArrayRef Directives) { bool HasDuplicate = false; for (const Directive Dir : Directives) { StringSet<> Clauses; // Check for duplicates in the three allowed lists. - if (HasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) || - HasDuplicateClauses(Dir.getAllowedOnceClauses(), Dir, Clauses) || - HasDuplicateClauses(Dir.getAllowedExclusiveClauses(), Dir, Clauses)) { + if (hasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) || + hasDuplicateClauses(Dir.getAllowedOnceClauses(), Dir, Clauses) || + hasDuplicateClauses(Dir.getAllowedExclusiveClauses(), Dir, Clauses)) { HasDuplicate = true; } // Check for duplicate between allowedClauses and required Clauses.clear(); - if (HasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) || - HasDuplicateClauses(Dir.getRequiredClauses(), Dir, Clauses)) { + if (hasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) || + hasDuplicateClauses(Dir.getRequiredClauses(), Dir, Clauses)) { HasDuplicate = true; } if (HasDuplicate) @@ -173,11 +173,11 @@ bool DirectiveLanguage::HasValidityErrors() const { return true; } - return HasDuplicateClausesInDirectives(getDirectives()); + return hasDuplicateClausesInDirectives(getDirectives()); } // Count the maximum number of leaf constituents per construct. -static size_t GetMaxLeafCount(const DirectiveLanguage &DirLang) { +static size_t getMaxLeafCount(const DirectiveLanguage &DirLang) { size_t MaxCount = 0; for (const Directive D : DirLang.getDirectives()) MaxCount = std::max(MaxCount, D.getLeafConstructs().size()); @@ -186,7 +186,7 @@ static size_t GetMaxLeafCount(const DirectiveLanguage &DirLang) { // Generate the declaration section for the enumeration in the directive // language. -static void EmitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { +static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { const auto DirLang = DirectiveLanguage(Records); if (DirLang.HasValidityErrors()) return; @@ -214,29 +214,29 @@ static void EmitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { OS << "\nLLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();\n"; // Emit Directive associations - std::vector associations; - copy_if(DirLang.getAssociations(), std::back_inserter(associations), + std::vector Associations; + copy_if(DirLang.getAssociations(), std::back_inserter(Associations), // Skip the "special" value [](const Record *Def) { return Def->getName() != "AS_FromLeaves"; }); - GenerateEnumClass(associations, OS, "Association", + generateEnumClass(Associations, OS, "Association", /*Prefix=*/"", DirLang, /*ExportEnums=*/false); - GenerateEnumClass(DirLang.getCategories(), OS, "Category", /*Prefix=*/"", + generateEnumClass(DirLang.getCategories(), OS, "Category", /*Prefix=*/"", DirLang, /*ExportEnums=*/false); // Emit Directive enumeration - GenerateEnumClass(DirLang.getDirectives(), OS, "Directive", + generateEnumClass(DirLang.getDirectives(), OS, "Directive", DirLang.getDirectivePrefix(), DirLang, DirLang.hasMakeEnumAvailableInNamespace()); // Emit Clause enumeration - GenerateEnumClass(DirLang.getClauses(), OS, "Clause", + generateEnumClass(DirLang.getClauses(), OS, "Clause", DirLang.getClausePrefix(), DirLang, DirLang.hasMakeEnumAvailableInNamespace()); // Emit ClauseVal enumeration std::string EnumHelperFuncs; - GenerateEnumClauseVal(DirLang.getClauses(), OS, DirLang, EnumHelperFuncs); + generateEnumClauseVal(DirLang.getClauses(), OS, DirLang, EnumHelperFuncs); // Generic function signatures OS << "\n"; @@ -259,7 +259,7 @@ static void EmitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { << "Clause C, unsigned Version);\n"; OS << "\n"; OS << "constexpr std::size_t getMaxLeafCount() { return " - << GetMaxLeafCount(DirLang) << "; }\n"; + << getMaxLeafCount(DirLang) << "; }\n"; OS << "LLVM_ABI Association getDirectiveAssociation(Directive D);\n"; OS << "LLVM_ABI Category getDirectiveCategory(Directive D);\n"; if (EnumHelperFuncs.length() > 0) { @@ -277,7 +277,7 @@ static void EmitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) { } // Generate function implementation for getName(StringRef Str) -static void GenerateGetName(ArrayRef Records, raw_ostream &OS, +static void generateGetName(ArrayRef Records, raw_ostream &OS, StringRef Enum, const DirectiveLanguage &DirLang, StringRef Prefix) { OS << "\n"; @@ -300,11 +300,11 @@ static void GenerateGetName(ArrayRef Records, raw_ostream &OS, } // Generate function implementation for getKind(StringRef Str) -static void GenerateGetKind(ArrayRef Records, raw_ostream &OS, +static void generateGetKind(ArrayRef Records, raw_ostream &OS, StringRef Enum, const DirectiveLanguage &DirLang, StringRef Prefix, bool ImplicitAsUnknown) { - auto DefaultIt = find_if( + const auto *DefaultIt = find_if( Records, [](const Record *R) { return R->getValueAsBit("isDefault"); }); if (DefaultIt == Records.end()) { @@ -334,7 +334,7 @@ static void GenerateGetKind(ArrayRef Records, raw_ostream &OS, } // Generate function implementation for getKind(StringRef Str) -static void GenerateGetKindClauseVal(const DirectiveLanguage &DirLang, +static void generateGetKindClauseVal(const DirectiveLanguage &DirLang, raw_ostream &OS) { for (const Clause C : DirLang.getClauses()) { const auto &ClauseVals = C.getClauseVals(); @@ -389,7 +389,7 @@ static void GenerateGetKindClauseVal(const DirectiveLanguage &DirLang, } } -static void GenerateCaseForVersionedClauses(ArrayRef Clauses, +static void generateCaseForVersionedClauses(ArrayRef Clauses, raw_ostream &OS, StringRef DirectiveName, const DirectiveLanguage &DirLang, @@ -406,7 +406,7 @@ static void GenerateCaseForVersionedClauses(ArrayRef Clauses, } } -static std::string GetDirectiveName(const DirectiveLanguage &DirLang, +static std::string getDirectiveName(const DirectiveLanguage &DirLang, const Record *Rec) { Directive Dir(Rec); return (Twine("llvm::") + DirLang.getCppNamespace() + @@ -414,12 +414,12 @@ static std::string GetDirectiveName(const DirectiveLanguage &DirLang, .str(); } -static std::string GetDirectiveType(const DirectiveLanguage &DirLang) { +static std::string getDirectiveType(const DirectiveLanguage &DirLang) { return (Twine("llvm::") + DirLang.getCppNamespace() + "::Directive").str(); } // Generate the isAllowedClauseForDirective function implementation. -static void GenerateIsAllowedClause(const DirectiveLanguage &DirLang, +static void generateIsAllowedClause(const DirectiveLanguage &DirLang, raw_ostream &OS) { OS << "\n"; OS << "bool llvm::" << DirLang.getCppNamespace() @@ -445,16 +445,16 @@ static void GenerateIsAllowedClause(const DirectiveLanguage &DirLang, StringSet<> Cases; - GenerateCaseForVersionedClauses(Dir.getAllowedClauses(), OS, + generateCaseForVersionedClauses(Dir.getAllowedClauses(), OS, Dir.getName(), DirLang, Cases); - GenerateCaseForVersionedClauses(Dir.getAllowedOnceClauses(), OS, + generateCaseForVersionedClauses(Dir.getAllowedOnceClauses(), OS, Dir.getName(), DirLang, Cases); - GenerateCaseForVersionedClauses(Dir.getAllowedExclusiveClauses(), OS, + generateCaseForVersionedClauses(Dir.getAllowedExclusiveClauses(), OS, Dir.getName(), DirLang, Cases); - GenerateCaseForVersionedClauses(Dir.getRequiredClauses(), OS, + generateCaseForVersionedClauses(Dir.getRequiredClauses(), OS, Dir.getName(), DirLang, Cases); OS << " default:\n"; @@ -470,7 +470,7 @@ static void GenerateIsAllowedClause(const DirectiveLanguage &DirLang, OS << "}\n"; // End of function isAllowedClauseForDirective } -static void EmitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS, +static void emitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS, StringRef TableName) { // The leaf constructs are emitted in a form of a 2D table, where each // row corresponds to a directive (and there is a row for each directive). @@ -498,7 +498,7 @@ static void EmitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS, DirId.insert(std::make_pair(Rec, Idx)); using LeafList = std::vector; - int MaxLeafCount = GetMaxLeafCount(DirLang); + int MaxLeafCount = getMaxLeafCount(DirLang); // The initial leaf table, rows order is same as directive order. std::vector LeafTable(Directives.size()); @@ -560,19 +560,19 @@ static void EmitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS, // type is `int` (by default). The code above uses `int` to store directive // ids, so make sure that we catch it when something changes in the // underlying type. - std::string DirectiveType = GetDirectiveType(DirLang); + std::string DirectiveType = getDirectiveType(DirLang); OS << "\nstatic_assert(sizeof(" << DirectiveType << ") == sizeof(int));\n"; OS << "[[maybe_unused]] static const " << DirectiveType << ' ' << TableName << "[][" << MaxLeafCount + 2 << "] = {\n"; for (size_t I = 0, E = Directives.size(); I != E; ++I) { auto &Leaves = LeafTable[Ordering[I]]; - OS << " {" << GetDirectiveName(DirLang, Directives[Leaves[0]]); + OS << " {" << getDirectiveName(DirLang, Directives[Leaves[0]]); OS << ", static_cast<" << DirectiveType << ">(" << Leaves[1] << "),"; for (size_t I = 2, E = Leaves.size(); I != E; ++I) { int Idx = Leaves[I]; if (Idx >= 0) - OS << ' ' << GetDirectiveName(DirLang, Directives[Leaves[I]]) << ','; + OS << ' ' << getDirectiveName(DirLang, Directives[Leaves[I]]) << ','; else OS << " static_cast<" << DirectiveType << ">(-1),"; } @@ -600,7 +600,7 @@ static void EmitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS, OS << "\n};\n"; } -static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang, +static void generateGetDirectiveAssociation(const DirectiveLanguage &DirLang, raw_ostream &OS) { enum struct Association { None = 0, // None should be the smallest value. @@ -613,10 +613,10 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang, Invalid, }; - ArrayRef associations = DirLang.getAssociations(); + ArrayRef Associations = DirLang.getAssociations(); - auto getAssocValue = [](StringRef name) -> Association { - return StringSwitch(name) + auto GetAssocValue = [](StringRef Name) -> Association { + return StringSwitch(Name) .Case("AS_Block", Association::Block) .Case("AS_Declaration", Association::Declaration) .Case("AS_Delimited", Association::Delimited) @@ -627,24 +627,24 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang, .Default(Association::Invalid); }; - auto getAssocName = [&](Association A) -> StringRef { + auto GetAssocName = [&](Association A) -> StringRef { if (A != Association::Invalid && A != Association::FromLeaves) { - auto F = find_if(associations, [&](const Record *R) { - return getAssocValue(R->getName()) == A; + const auto *F = find_if(Associations, [&](const Record *R) { + return GetAssocValue(R->getName()) == A; }); - if (F != associations.end()) + if (F != Associations.end()) return (*F)->getValueAsString("name"); // enum name } llvm_unreachable("Unexpected association value"); }; - auto errorPrefixFor = [&](Directive D) -> std::string { + auto ErrorPrefixFor = [&](Directive D) -> std::string { return (Twine("Directive '") + D.getName() + "' in namespace '" + DirLang.getCppNamespace() + "' ") .str(); }; - auto reduce = [&](Association A, Association B) -> Association { + auto Reduce = [&](Association A, Association B) -> Association { if (A > B) std::swap(A, B); @@ -663,14 +663,14 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang, DenseMap AsMap; - auto compAssocImpl = [&](const Record *R, auto &&Self) -> Association { + auto CompAssocImpl = [&](const Record *R, auto &&Self) -> Association { if (auto F = AsMap.find(R); F != AsMap.end()) return F->second; Directive D(R); - Association AS = getAssocValue(D.getAssociation()->getName()); + Association AS = GetAssocValue(D.getAssociation()->getName()); if (AS == Association::Invalid) { - PrintFatalError(errorPrefixFor(D) + + PrintFatalError(ErrorPrefixFor(D) + "has an unrecognized value for association: '" + D.getAssociation()->getName() + "'"); } @@ -679,22 +679,22 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang, return AS; } // Compute the association from leaf constructs. - std::vector leaves = D.getLeafConstructs(); - if (leaves.empty()) { + std::vector Leaves = D.getLeafConstructs(); + if (Leaves.empty()) { errs() << D.getName() << '\n'; - PrintFatalError(errorPrefixFor(D) + + PrintFatalError(ErrorPrefixFor(D) + "requests association to be computed from leaves, " "but it has no leaves"); } - Association Result = Self(leaves[0], Self); - for (int I = 1, E = leaves.size(); I < E; ++I) { - Association A = Self(leaves[I], Self); - Association R = reduce(Result, A); + Association Result = Self(Leaves[0], Self); + for (int I = 1, E = Leaves.size(); I < E; ++I) { + Association A = Self(Leaves[I], Self); + Association R = Reduce(Result, A); if (R == Association::Invalid) { - PrintFatalError(errorPrefixFor(D) + + PrintFatalError(ErrorPrefixFor(D) + "has leaves with incompatible association values: " + - getAssocName(A) + " and " + getAssocName(R)); + GetAssocName(A) + " and " + GetAssocName(R)); } Result = R; } @@ -706,11 +706,11 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang, }; for (const Record *R : DirLang.getDirectives()) - compAssocImpl(R, compAssocImpl); // Updates AsMap. + CompAssocImpl(R, CompAssocImpl); // Updates AsMap. OS << '\n'; - auto getQualifiedName = [&](StringRef Formatted) -> std::string { + auto GetQualifiedName = [&](StringRef Formatted) -> std::string { return (Twine("llvm::") + DirLang.getCppNamespace() + "::Directive::" + DirLang.getDirectivePrefix() + Formatted) .str(); @@ -727,9 +727,9 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang, for (const Record *R : DirLang.getDirectives()) { if (auto F = AsMap.find(R); F != AsMap.end()) { Directive Dir(R); - OS << " case " << getQualifiedName(Dir.getFormattedName()) << ":\n"; + OS << " case " << GetQualifiedName(Dir.getFormattedName()) << ":\n"; OS << " return " << AssociationTypeName - << "::" << getAssocName(F->second) << ";\n"; + << "::" << GetAssocName(F->second) << ";\n"; } } OS << " } // switch (Dir)\n"; @@ -737,7 +737,7 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang, OS << "}\n"; } -static void GenerateGetDirectiveCategory(const DirectiveLanguage &DirLang, +static void generateGetDirectiveCategory(const DirectiveLanguage &DirLang, raw_ostream &OS) { std::string LangNamespace = "llvm::" + DirLang.getCppNamespace().str(); std::string CategoryTypeName = LangNamespace + "::Category"; @@ -745,12 +745,12 @@ static void GenerateGetDirectiveCategory(const DirectiveLanguage &DirLang, OS << '\n'; OS << CategoryTypeName << ' ' << LangNamespace << "::getDirectiveCategory(" - << GetDirectiveType(DirLang) << " Dir) {\n"; + << getDirectiveType(DirLang) << " Dir) {\n"; OS << " switch (Dir) {\n"; for (const Record *R : DirLang.getDirectives()) { Directive D(R); - OS << " case " << GetDirectiveName(DirLang, R) << ":\n"; + OS << " case " << getDirectiveName(DirLang, R) << ":\n"; OS << " return " << CategoryNamespace << D.getCategory()->getValueAsString("name") << ";\n"; } @@ -760,7 +760,7 @@ static void GenerateGetDirectiveCategory(const DirectiveLanguage &DirLang, } // Generate a simple enum set with the give clauses. -static void GenerateClauseSet(ArrayRef Clauses, raw_ostream &OS, +static void generateClauseSet(ArrayRef Clauses, raw_ostream &OS, StringRef ClauseSetPrefix, const Directive &Dir, const DirectiveLanguage &DirLang) { @@ -778,7 +778,7 @@ static void GenerateClauseSet(ArrayRef Clauses, raw_ostream &OS, } // Generate an enum set for the 4 kinds of clauses linked to a directive. -static void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang, +static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang, raw_ostream &OS) { IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_SETS", OS); @@ -796,13 +796,13 @@ static void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang, OS << "\n"; OS << " // Sets for " << Dir.getName() << "\n"; - GenerateClauseSet(Dir.getAllowedClauses(), OS, "allowedClauses_", Dir, + generateClauseSet(Dir.getAllowedClauses(), OS, "allowedClauses_", Dir, DirLang); - GenerateClauseSet(Dir.getAllowedOnceClauses(), OS, "allowedOnceClauses_", + generateClauseSet(Dir.getAllowedOnceClauses(), OS, "allowedOnceClauses_", Dir, DirLang); - GenerateClauseSet(Dir.getAllowedExclusiveClauses(), OS, + generateClauseSet(Dir.getAllowedExclusiveClauses(), OS, "allowedExclusiveClauses_", Dir, DirLang); - GenerateClauseSet(Dir.getRequiredClauses(), OS, "requiredClauses_", Dir, + generateClauseSet(Dir.getRequiredClauses(), OS, "requiredClauses_", Dir, DirLang); } @@ -816,7 +816,7 @@ static void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang, // Generate a map of directive (key) with DirectiveClauses struct as values. // The struct holds the 4 sets of enumeration for the 4 kinds of clauses // allowances (allowed, allowed once, allowed exclusive and required). -static void GenerateDirectiveClauseMap(const DirectiveLanguage &DirLang, +static void generateDirectiveClauseMap(const DirectiveLanguage &DirLang, raw_ostream &OS) { IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_MAP", OS); @@ -850,7 +850,7 @@ static void GenerateDirectiveClauseMap(const DirectiveLanguage &DirLang, // If the clause does not hold a value, an EMPTY_CLASS is used. // If the clause class is generic then a WRAPPER_CLASS is used. When the value // is optional, the value class is wrapped into a std::optional. -static void GenerateFlangClauseParserClass(const DirectiveLanguage &DirLang, +static void generateFlangClauseParserClass(const DirectiveLanguage &DirLang, raw_ostream &OS) { IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_CLASSES", OS); @@ -877,7 +877,7 @@ static void GenerateFlangClauseParserClass(const DirectiveLanguage &DirLang, } // Generate a list of the different clause classes for Flang. -static void GenerateFlangClauseParserClassList(const DirectiveLanguage &DirLang, +static void generateFlangClauseParserClassList(const DirectiveLanguage &DirLang, raw_ostream &OS) { IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_CLASSES_LIST", OS); @@ -890,7 +890,7 @@ static void GenerateFlangClauseParserClassList(const DirectiveLanguage &DirLang, } // Generate dump node list for the clauses holding a generic class name. -static void GenerateFlangClauseDump(const DirectiveLanguage &DirLang, +static void generateFlangClauseDump(const DirectiveLanguage &DirLang, raw_ostream &OS) { IfDefScope Scope("GEN_FLANG_DUMP_PARSE_TREE_CLAUSES", OS); @@ -904,7 +904,7 @@ static void GenerateFlangClauseDump(const DirectiveLanguage &DirLang, // Generate Unparse functions for clauses classes in the Flang parse-tree // If the clause is a non-generic class, no entry is generated. -static void GenerateFlangClauseUnparse(const DirectiveLanguage &DirLang, +static void generateFlangClauseUnparse(const DirectiveLanguage &DirLang, raw_ostream &OS) { IfDefScope Scope("GEN_FLANG_CLAUSE_UNPARSE", OS); @@ -955,7 +955,7 @@ static void GenerateFlangClauseUnparse(const DirectiveLanguage &DirLang, } // Generate check in the Enter functions for clauses classes. -static void GenerateFlangClauseCheckPrototypes(const DirectiveLanguage &DirLang, +static void generateFlangClauseCheckPrototypes(const DirectiveLanguage &DirLang, raw_ostream &OS) { IfDefScope Scope("GEN_FLANG_CLAUSE_CHECK_ENTER", OS); @@ -969,7 +969,7 @@ static void GenerateFlangClauseCheckPrototypes(const DirectiveLanguage &DirLang, // Generate the mapping for clauses between the parser class and the // corresponding clause Kind -static void GenerateFlangClauseParserKindMap(const DirectiveLanguage &DirLang, +static void generateFlangClauseParserKindMap(const DirectiveLanguage &DirLang, raw_ostream &OS) { IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_KIND_MAP", OS); @@ -996,7 +996,7 @@ static bool compareClauseName(const Record *R1, const Record *R2) { } // Generate the parser for the clauses. -static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang, +static void generateFlangClausesParser(const DirectiveLanguage &DirLang, raw_ostream &OS) { std::vector Clauses = DirLang.getClauses(); // Sort clauses in reverse alphabetical order so with clauses with same @@ -1004,8 +1004,8 @@ static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang, sort(Clauses, compareClauseName); IfDefScope Scope("GEN_FLANG_CLAUSES_PARSER", OS); OS << "\n"; - unsigned index = 0; - unsigned lastClauseIndex = Clauses.size() - 1; + unsigned Index = 0; + unsigned LastClauseIndex = Clauses.size() - 1; OS << "TYPE_PARSER(\n"; for (const Clause Clause : Clauses) { if (Clause.getAliases().empty()) { @@ -1013,8 +1013,8 @@ static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang, } else { OS << " (" << "\"" << Clause.getName() << "\"_tok"; - for (StringRef alias : Clause.getAliases()) { - OS << " || \"" << alias << "\"_tok"; + for (StringRef Alias : Clause.getAliases()) { + OS << " || \"" << Alias << "\"_tok"; } OS << ")"; } @@ -1024,10 +1024,10 @@ static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang, << "::" << Clause.getFormattedParserClassName() << ">("; if (Clause.getFlangClass().empty()) { OS << "))"; - if (index != lastClauseIndex) + if (Index != LastClauseIndex) OS << " ||"; OS << "\n"; - ++index; + ++Index; continue; } @@ -1064,38 +1064,38 @@ static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang, if (Clause.isValueOptional()) // close maybe(. OS << ")"; OS << "))"; - if (index != lastClauseIndex) + if (Index != LastClauseIndex) OS << " ||"; OS << "\n"; - ++index; + ++Index; } OS << ")\n"; } // Generate the implementation section for the enumeration in the directive // language -static void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang, +static void emitDirectivesFlangImpl(const DirectiveLanguage &DirLang, raw_ostream &OS) { - GenerateDirectiveClauseSets(DirLang, OS); + generateDirectiveClauseSets(DirLang, OS); - GenerateDirectiveClauseMap(DirLang, OS); + generateDirectiveClauseMap(DirLang, OS); - GenerateFlangClauseParserClass(DirLang, OS); + generateFlangClauseParserClass(DirLang, OS); - GenerateFlangClauseParserClassList(DirLang, OS); + generateFlangClauseParserClassList(DirLang, OS); - GenerateFlangClauseDump(DirLang, OS); + generateFlangClauseDump(DirLang, OS); - GenerateFlangClauseUnparse(DirLang, OS); + generateFlangClauseUnparse(DirLang, OS); - GenerateFlangClauseCheckPrototypes(DirLang, OS); + generateFlangClauseCheckPrototypes(DirLang, OS); - GenerateFlangClauseParserKindMap(DirLang, OS); + generateFlangClauseParserKindMap(DirLang, OS); - GenerateFlangClausesParser(DirLang, OS); + generateFlangClausesParser(DirLang, OS); } -static void GenerateClauseClassMacro(const DirectiveLanguage &DirLang, +static void generateClauseClassMacro(const DirectiveLanguage &DirLang, raw_ostream &OS) { // Generate macros style information for legacy code in clang IfDefScope Scope("GEN_CLANG_CLAUSE_CLASS", OS); @@ -1163,63 +1163,63 @@ static void GenerateClauseClassMacro(const DirectiveLanguage &DirLang, // Generate the implemenation for the enumeration in the directive // language. This code can be included in library. -void EmitDirectivesBasicImpl(const DirectiveLanguage &DirLang, +void emitDirectivesBasicImpl(const DirectiveLanguage &DirLang, raw_ostream &OS) { IfDefScope Scope("GEN_DIRECTIVES_IMPL", OS); OS << "\n#include \"llvm/Support/ErrorHandling.h\"\n"; // getDirectiveKind(StringRef Str) - GenerateGetKind(DirLang.getDirectives(), OS, "Directive", DirLang, + generateGetKind(DirLang.getDirectives(), OS, "Directive", DirLang, DirLang.getDirectivePrefix(), /*ImplicitAsUnknown=*/false); // getDirectiveName(Directive Kind) - GenerateGetName(DirLang.getDirectives(), OS, "Directive", DirLang, + generateGetName(DirLang.getDirectives(), OS, "Directive", DirLang, DirLang.getDirectivePrefix()); // getClauseKind(StringRef Str) - GenerateGetKind(DirLang.getClauses(), OS, "Clause", DirLang, + generateGetKind(DirLang.getClauses(), OS, "Clause", DirLang, DirLang.getClausePrefix(), /*ImplicitAsUnknown=*/true); // getClauseName(Clause Kind) - GenerateGetName(DirLang.getClauses(), OS, "Clause", DirLang, + generateGetName(DirLang.getClauses(), OS, "Clause", DirLang, DirLang.getClausePrefix()); // getKind(StringRef Str) - GenerateGetKindClauseVal(DirLang, OS); + generateGetKindClauseVal(DirLang, OS); // isAllowedClauseForDirective(Directive D, Clause C, unsigned Version) - GenerateIsAllowedClause(DirLang, OS); + generateIsAllowedClause(DirLang, OS); // getDirectiveAssociation(Directive D) - GenerateGetDirectiveAssociation(DirLang, OS); + generateGetDirectiveAssociation(DirLang, OS); // getDirectiveCategory(Directive D) - GenerateGetDirectiveCategory(DirLang, OS); + generateGetDirectiveCategory(DirLang, OS); // Leaf table for getLeafConstructs, etc. - EmitLeafTable(DirLang, OS, "LeafConstructTable"); + emitLeafTable(DirLang, OS, "LeafConstructTable"); } // Generate the implemenation section for the enumeration in the directive // language. -static void EmitDirectivesImpl(const RecordKeeper &Records, raw_ostream &OS) { +static void emitDirectivesImpl(const RecordKeeper &Records, raw_ostream &OS) { const auto DirLang = DirectiveLanguage(Records); if (DirLang.HasValidityErrors()) return; - EmitDirectivesFlangImpl(DirLang, OS); + emitDirectivesFlangImpl(DirLang, OS); - GenerateClauseClassMacro(DirLang, OS); + generateClauseClassMacro(DirLang, OS); - EmitDirectivesBasicImpl(DirLang, OS); + emitDirectivesBasicImpl(DirLang, OS); } static TableGen::Emitter::Opt - X("gen-directive-decl", EmitDirectivesDecl, + X("gen-directive-decl", emitDirectivesDecl, "Generate directive related declaration code (header file)"); static TableGen::Emitter::Opt - Y("gen-directive-impl", EmitDirectivesImpl, + Y("gen-directive-impl", emitDirectivesImpl, "Generate directive related implementation code"); diff --git a/llvm/utils/TableGen/DisassemblerEmitter.cpp b/llvm/utils/TableGen/DisassemblerEmitter.cpp index eb15392272a3f31..70d835e699ffeff 100644 --- a/llvm/utils/TableGen/DisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/DisassemblerEmitter.cpp @@ -95,7 +95,7 @@ using namespace llvm::X86Disassembler; /// X86RecognizableInstr.cpp contains the implementation for a single /// instruction. -static void EmitDisassembler(const RecordKeeper &Records, raw_ostream &OS) { +static void emitDisassembler(const RecordKeeper &Records, raw_ostream &OS) { const CodeGenTarget Target(Records); emitSourceFileHeader(" * " + Target.getName().str() + " Disassembler", OS); @@ -132,5 +132,5 @@ static void EmitDisassembler(const RecordKeeper &Records, raw_ostream &OS) { cl::OptionCategory DisassemblerEmitterCat("Options for -gen-disassembler"); -static TableGen::Emitter::Opt X("gen-disassembler", EmitDisassembler, +static TableGen::Emitter::Opt X("gen-disassembler", emitDisassembler, "Generate disassembler"); diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp index cd7a140bb23143b..86e8378ad5ac5d6 100644 --- a/llvm/utils/TableGen/OptionParserEmitter.cpp +++ b/llvm/utils/TableGen/OptionParserEmitter.cpp @@ -26,7 +26,7 @@ static std::string getOptionName(const Record &R) { return std::string(R.getValueAsString("EnumName")); } -static raw_ostream &write_cstring(raw_ostream &OS, llvm::StringRef Str) { +static raw_ostream &writeCstring(raw_ostream &OS, llvm::StringRef Str) { OS << '"'; OS.write_escaped(Str); OS << '"'; @@ -117,7 +117,7 @@ struct SimpleEnumValueTable { OS << "static const SimpleEnumValue " << ValueTableName << "[] = {\n"; for (unsigned I = 0, E = Values.size(); I != E; ++I) { OS << "{"; - write_cstring(OS, Values[I]); + writeCstring(OS, Values[I]); OS << ","; OS << "static_cast("; emitScopedNormalizedValue(OS, NormalizedValues[I]); @@ -190,7 +190,7 @@ static MarshallingInfo createMarshallingInfo(const Record &R) { return Ret; } -static void EmitHelpTextsForVariants( +static void emitHelpTextsForVariants( raw_ostream &OS, std::vector, StringRef>> HelpTextsForVariants) { // OptTable must be constexpr so it uses std::arrays with these capacities. @@ -235,7 +235,7 @@ static void EmitHelpTextsForVariants( OS << "}}, "; if (Help.size()) - write_cstring(OS, Help); + writeCstring(OS, Help); else OS << "nullptr"; OS << ")"; @@ -249,7 +249,7 @@ static void EmitHelpTextsForVariants( /// OptionParserEmitter - This tablegen backend takes an input .td file /// describing a list of options and emits a data structure for parsing and /// working with those options when given an input command line. -static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { +static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { // Get the option groups and options. ArrayRef Groups = Records.getAllDerivedDefinitions("OptionGroup"); @@ -363,12 +363,12 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { if (!isa(R.getValueInit("HelpText"))) { OS << ",\n"; OS << " "; - write_cstring(OS, R.getValueAsString("HelpText")); + writeCstring(OS, R.getValueAsString("HelpText")); } else OS << ", nullptr"; // Not using Visibility specific text for group help. - EmitHelpTextsForVariants(OS, {}); + emitHelpTextsForVariants(OS, {}); // The option meta-variable name (unused). OS << ", nullptr"; @@ -387,7 +387,7 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { OS << Prefixes[PrefixKeyT(RPrefixes.begin(), RPrefixes.end())] << ", "; // The option prefixed name. - write_cstring(OS, getOptionPrefixedName(R)); + writeCstring(OS, getOptionPrefixedName(R)); // The option identifier name. OS << ", " << getOptionName(R); @@ -464,7 +464,7 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { if (!isa(R.getValueInit("HelpText"))) { OS << ",\n"; OS << " "; - write_cstring(OS, R.getValueAsString("HelpText")); + writeCstring(OS, R.getValueAsString("HelpText")); } else OS << ", nullptr"; @@ -482,19 +482,19 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { HelpTextsForVariants.push_back(std::make_pair( VisibilityNames, VisibilityHelp->getValueAsString("Text"))); } - EmitHelpTextsForVariants(OS, HelpTextsForVariants); + emitHelpTextsForVariants(OS, HelpTextsForVariants); // The option meta-variable name. OS << ", "; if (!isa(R.getValueInit("MetaVarName"))) - write_cstring(OS, R.getValueAsString("MetaVarName")); + writeCstring(OS, R.getValueAsString("MetaVarName")); else OS << "nullptr"; // The option Values. Used for shell autocompletion. OS << ", "; if (!isa(R.getValueInit("Values"))) - write_cstring(OS, R.getValueAsString("Values")); + writeCstring(OS, R.getValueAsString("Values")); else if (!isa(R.getValueInit("ValuesCode"))) { OS << getOptionName(R) << "_Values"; } else @@ -571,5 +571,5 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) { OS << "\n"; } -static TableGen::Emitter::Opt X("gen-opt-parser-defs", EmitOptionParser, +static TableGen::Emitter::Opt X("gen-opt-parser-defs", emitOptionParser, "Generate option definitions"); diff --git a/llvm/utils/TableGen/OptionRSTEmitter.cpp b/llvm/utils/TableGen/OptionRSTEmitter.cpp index 1b4c4cad4f0a451..6eac10e1831f022 100644 --- a/llvm/utils/TableGen/OptionRSTEmitter.cpp +++ b/llvm/utils/TableGen/OptionRSTEmitter.cpp @@ -16,7 +16,7 @@ using namespace llvm; /// This tablegen backend takes an input .td file describing a list of options /// and emits a RST man page. -static void EmitOptionRST(const RecordKeeper &Records, raw_ostream &OS) { +static void emitOptionRst(const RecordKeeper &Records, raw_ostream &OS) { llvm::StringMap> OptionsByGroup; // Get the options. @@ -96,5 +96,5 @@ static void EmitOptionRST(const RecordKeeper &Records, raw_ostream &OS) { } } -static TableGen::Emitter::Opt X("gen-opt-rst", EmitOptionRST, +static TableGen::Emitter::Opt X("gen-opt-rst", emitOptionRst, "Generate option RST"); diff --git a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp index 23496a37d5ea1c3..39211aab6f2d1ec 100644 --- a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp +++ b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp @@ -244,13 +244,13 @@ static void emitRISCVExtensionBitmask(const RecordKeeper &RK, raw_ostream &OS) { OS << "#endif\n"; } -static void EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS) { +static void emitRiscvTargetDef(const RecordKeeper &RK, raw_ostream &OS) { emitRISCVExtensions(RK, OS); emitRISCVProfiles(RK, OS); emitRISCVProcs(RK, OS); emitRISCVExtensionBitmask(RK, OS); } -static TableGen::Emitter::Opt X("gen-riscv-target-def", EmitRISCVTargetDef, +static TableGen::Emitter::Opt X("gen-riscv-target-def", emitRiscvTargetDef, "Generate the list of CPUs and extensions for " "RISC-V"); diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 17b84d06fe85738..02c799cb6f14710 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -87,65 +87,65 @@ class SubtargetEmitter { CodeGenSchedModels &SchedModels; std::string Target; - FeatureMapTy Enumeration(raw_ostream &OS); - void EmitSubtargetInfoMacroCalls(raw_ostream &OS); - unsigned FeatureKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap); - unsigned CPUKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap); - void FormItineraryStageString(const std::string &Names, + FeatureMapTy enumeration(raw_ostream &OS); + void emitSubtargetInfoMacroCalls(raw_ostream &OS); + unsigned featureKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap); + unsigned cpuKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap); + void formItineraryStageString(const std::string &Names, const Record *ItinData, std::string &ItinString, unsigned &NStages); - void FormItineraryOperandCycleString(const Record *ItinData, + void formItineraryOperandCycleString(const Record *ItinData, std::string &ItinString, unsigned &NOperandCycles); - void FormItineraryBypassString(const std::string &Names, + void formItineraryBypassString(const std::string &Names, const Record *ItinData, std::string &ItinString, unsigned NOperandCycles); - void EmitStageAndOperandCycleData( + void emitStageAndOperandCycleData( raw_ostream &OS, std::vector> &ProcItinLists); - void EmitItineraries(raw_ostream &OS, + void emitItineraries(raw_ostream &OS, std::vector> &ProcItinLists); - unsigned EmitRegisterFileTables(const CodeGenProcModel &ProcModel, + unsigned emitRegisterFileTables(const CodeGenProcModel &ProcModel, raw_ostream &OS); - void EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel, + void emitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS); - void EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel, + void emitExtraProcessorInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS); - void EmitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name, + void emitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name, char Separator); - void EmitProcessorResourceSubUnits(const CodeGenProcModel &ProcModel, + void emitProcessorResourceSubUnits(const CodeGenProcModel &ProcModel, raw_ostream &OS); - void EmitProcessorResources(const CodeGenProcModel &ProcModel, + void emitProcessorResources(const CodeGenProcModel &ProcModel, raw_ostream &OS); - const Record *FindWriteResources(const CodeGenSchedRW &SchedWrite, + const Record *findWriteResources(const CodeGenSchedRW &SchedWrite, const CodeGenProcModel &ProcModel); - const Record *FindReadAdvance(const CodeGenSchedRW &SchedRead, + const Record *findReadAdvance(const CodeGenSchedRW &SchedRead, const CodeGenProcModel &ProcModel); - void ExpandProcResources(ConstRecVec &PRVec, + void expandProcResources(ConstRecVec &PRVec, std::vector &ReleaseAtCycles, std::vector &AcquireAtCycles, const CodeGenProcModel &ProcModel); - void GenSchedClassTables(const CodeGenProcModel &ProcModel, + void genSchedClassTables(const CodeGenProcModel &ProcModel, SchedClassTables &SchedTables); - void EmitSchedClassTables(SchedClassTables &SchedTables, raw_ostream &OS); - void EmitProcessorModels(raw_ostream &OS); - void EmitSchedModelHelpers(const std::string &ClassName, raw_ostream &OS); + void emitSchedClassTables(SchedClassTables &SchedTables, raw_ostream &OS); + void emitProcessorModels(raw_ostream &OS); + void emitSchedModelHelpers(const std::string &ClassName, raw_ostream &OS); void emitSchedModelHelpersImpl(raw_ostream &OS, bool OnlyExpandMCInstPredicates = false); void emitGenMCSubtargetInfo(raw_ostream &OS); - void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS); + void emitMcInstrAnalysisPredicateFunctions(raw_ostream &OS); - void EmitSchedModel(raw_ostream &OS); + void emitSchedModel(raw_ostream &OS); void emitGetMacroFusions(const std::string &ClassName, raw_ostream &OS); - void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS); - void ParseFeaturesFunction(raw_ostream &OS); + void emitHwModeCheck(const std::string &ClassName, raw_ostream &OS); + void parseFeaturesFunction(raw_ostream &OS); public: SubtargetEmitter(const RecordKeeper &R) : TGT(R), Records(R), SchedModels(TGT.getSchedModels()), Target(TGT.getName()) {} - void run(raw_ostream &o); + void run(raw_ostream &O); }; } // end anonymous namespace @@ -153,7 +153,7 @@ class SubtargetEmitter { // // Enumeration - Emit the specified class as an enumeration. // -FeatureMapTy SubtargetEmitter::Enumeration(raw_ostream &OS) { +FeatureMapTy SubtargetEmitter::enumeration(raw_ostream &OS) { ArrayRef DefList = Records.getAllDerivedDefinitions("SubtargetFeature"); @@ -171,15 +171,15 @@ FeatureMapTy SubtargetEmitter::Enumeration(raw_ostream &OS) { FeatureMapTy FeatureMap; // For each record - for (unsigned i = 0; i < N; ++i) { + for (unsigned I = 0; I < N; ++I) { // Next record - const Record *Def = DefList[i]; + const Record *Def = DefList[I]; // Get and emit name - OS << " " << Def->getName() << " = " << i << ",\n"; + OS << " " << Def->getName() << " = " << I << ",\n"; // Save the index for this feature. - FeatureMap[Def] = i; + FeatureMap[Def] = I; } OS << " " @@ -201,9 +201,9 @@ static void printFeatureMask(raw_ostream &OS, } OS << "{ { { "; - for (unsigned i = 0; i != Mask.size(); ++i) { + for (unsigned I = 0; I != Mask.size(); ++I) { OS << "0x"; - OS.write_hex(Mask[i]); + OS.write_hex(Mask[I]); OS << "ULL, "; } OS << "} } }"; @@ -211,7 +211,7 @@ static void printFeatureMask(raw_ostream &OS, /// Emit some information about the SubtargetFeature as calls to a macro so /// that they can be used from C++. -void SubtargetEmitter::EmitSubtargetInfoMacroCalls(raw_ostream &OS) { +void SubtargetEmitter::emitSubtargetInfoMacroCalls(raw_ostream &OS) { OS << "\n#ifdef GET_SUBTARGETINFO_MACRO\n"; std::vector FeatureList = @@ -252,7 +252,7 @@ void SubtargetEmitter::EmitSubtargetInfoMacroCalls(raw_ostream &OS) { // FeatureKeyValues - Emit data of all the subtarget features. Used by the // command line. // -unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS, +unsigned SubtargetEmitter::featureKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap) { std::vector FeatureList = Records.getAllDerivedDefinitions("SubtargetFeature"); @@ -301,7 +301,7 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS, // CPUKeyValues - Emit data of all the subtarget processors. Used by command // line. // -unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS, +unsigned SubtargetEmitter::cpuKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap) { // Gather and sort processor information std::vector ProcessorList = @@ -349,7 +349,7 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS, // data initialization for the specified itinerary. N is the number // of stages. // -void SubtargetEmitter::FormItineraryStageString(const std::string &Name, +void SubtargetEmitter::formItineraryStageString(const std::string &Name, const Record *ItinData, std::string &ItinString, unsigned &NStages) { @@ -358,9 +358,9 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name, // For each stage unsigned N = NStages = StageList.size(); - for (unsigned i = 0; i < N;) { + for (unsigned I = 0; I < N;) { // Next stage - const Record *Stage = StageList[i]; + const Record *Stage = StageList[I]; // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc, kind } int Cycles = Stage->getValueAsInt("Cycles"); @@ -370,10 +370,10 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name, ConstRecVec UnitList = Stage->getValueAsListOfDefs("Units"); // For each unit - for (unsigned j = 0, M = UnitList.size(); j < M;) { + for (unsigned J = 0, M = UnitList.size(); J < M;) { // Add name and bitwise or - ItinString += Name + "FU::" + UnitList[j]->getName().str(); - if (++j < M) + ItinString += Name + "FU::" + UnitList[J]->getName().str(); + if (++J < M) ItinString += " | "; } @@ -385,7 +385,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name, // Close off stage ItinString += " }"; - if (++i < N) + if (++I < N) ItinString += ", "; } } @@ -395,7 +395,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name, // operand cycle initialization for the specified itinerary. N is the // number of operands that has cycles specified. // -void SubtargetEmitter::FormItineraryOperandCycleString( +void SubtargetEmitter::formItineraryOperandCycleString( const Record *ItinData, std::string &ItinString, unsigned &NOperandCycles) { // Get operand cycle list std::vector OperandCycleList = @@ -411,19 +411,19 @@ void SubtargetEmitter::FormItineraryOperandCycleString( } } -void SubtargetEmitter::FormItineraryBypassString(const std::string &Name, +void SubtargetEmitter::formItineraryBypassString(const std::string &Name, const Record *ItinData, std::string &ItinString, unsigned NOperandCycles) { ConstRecVec BypassList = ItinData->getValueAsListOfDefs("Bypasses"); unsigned N = BypassList.size(); - unsigned i = 0; + unsigned I = 0; ListSeparator LS; - for (; i < N; ++i) { + for (; I < N; ++I) { ItinString += LS; - ItinString += Name + "Bypass::" + BypassList[i]->getName().str(); + ItinString += Name + "Bypass::" + BypassList[I]->getName().str(); } - for (; i < NOperandCycles; ++i) { + for (; I < NOperandCycles; ++I) { ItinString += LS; ItinString += " 0"; } @@ -434,7 +434,7 @@ void SubtargetEmitter::FormItineraryBypassString(const std::string &Name, // cycle tables. Create a list of InstrItinerary objects (ProcItinLists) indexed // by CodeGenSchedClass::Index. // -void SubtargetEmitter::EmitStageAndOperandCycleData( +void SubtargetEmitter::emitStageAndOperandCycleData( raw_ostream &OS, std::vector> &ProcItinLists) { // Multiple processor models may share an itinerary record. Emit it once. SmallPtrSet ItinsDefSet; @@ -453,9 +453,9 @@ void SubtargetEmitter::EmitStageAndOperandCycleData( OS << "\n// Functional units for \"" << Name << "\"\n" << "namespace " << Name << "FU {\n"; - for (unsigned j = 0, FUN = FUs.size(); j < FUN; ++j) - OS << " const InstrStage::FuncUnits " << FUs[j]->getName() - << " = 1ULL << " << j << ";\n"; + for (unsigned J = 0, FUN = FUs.size(); J < FUN; ++J) + OS << " const InstrStage::FuncUnits " << FUs[J]->getName() + << " = 1ULL << " << J << ";\n"; OS << "} // end namespace " << Name << "FU\n"; @@ -466,8 +466,8 @@ void SubtargetEmitter::EmitStageAndOperandCycleData( << "namespace " << Name << "Bypass {\n"; OS << " const unsigned NoBypass = 0;\n"; - for (unsigned j = 0, BPN = BPs.size(); j < BPN; ++j) - OS << " const unsigned " << BPs[j]->getName() << " = 1 << " << j + for (unsigned J = 0, BPN = BPs.size(); J < BPN; ++J) + OS << " const unsigned " << BPs[J]->getName() << " = 1 << " << J << ";\n"; OS << "} // end namespace " << Name << "Bypass\n"; @@ -518,7 +518,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData( std::string ItinStageString; unsigned NStages = 0; if (ItinData) - FormItineraryStageString(std::string(Name), ItinData, ItinStageString, + formItineraryStageString(std::string(Name), ItinData, ItinStageString, NStages); // Get string and operand cycle count @@ -526,10 +526,10 @@ void SubtargetEmitter::EmitStageAndOperandCycleData( unsigned NOperandCycles = 0; std::string ItinBypassString; if (ItinData) { - FormItineraryOperandCycleString(ItinData, ItinOperandCycleString, + formItineraryOperandCycleString(ItinData, ItinOperandCycleString, NOperandCycles); - FormItineraryBypassString(std::string(Name), ItinData, ItinBypassString, + formItineraryBypassString(std::string(Name), ItinData, ItinBypassString, NOperandCycles); } @@ -610,7 +610,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData( // Itineraries for each processor. The Itinerary lists are indexed on // CodeGenSchedClass::Index. // -void SubtargetEmitter::EmitItineraries( +void SubtargetEmitter::emitItineraries( raw_ostream &OS, std::vector> &ProcItinLists) { // Multiple processor models may share an itinerary record. Emit it once. SmallPtrSet ItinsDefSet; @@ -642,15 +642,15 @@ void SubtargetEmitter::EmitItineraries( OS << ItinsDef->getName() << "[] = {\n"; // For each itinerary class in CodeGenSchedClass::Index order. - for (unsigned j = 0, M = ItinList.size(); j < M; ++j) { - InstrItinerary &Intinerary = ItinList[j]; + for (unsigned J = 0, M = ItinList.size(); J < M; ++J) { + InstrItinerary &Intinerary = ItinList[J]; // Emit Itinerary in the form of // { firstStage, lastStage, firstCycle, lastCycle } // index OS << " { " << Intinerary.NumMicroOps << ", " << Intinerary.FirstStage << ", " << Intinerary.LastStage << ", " << Intinerary.FirstOperandCycle << ", " << Intinerary.LastOperandCycle << " }" - << ", // " << j << " " << SchedModels.getSchedClass(j).Name << "\n"; + << ", // " << J << " " << SchedModels.getSchedClass(J).Name << "\n"; } // End processor itinerary table OS << " { 0, uint16_t(~0U), uint16_t(~0U), uint16_t(~0U), uint16_t(~0U) }" @@ -662,7 +662,7 @@ void SubtargetEmitter::EmitItineraries( // Emit either the value defined in the TableGen Record, or the default // value defined in the C++ header. The Record is null if the processor does not // define a model. -void SubtargetEmitter::EmitProcessorProp(raw_ostream &OS, const Record *R, +void SubtargetEmitter::emitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name, char Separator) { OS << " "; int V = R ? R->getValueAsInt(Name) : -1; @@ -673,14 +673,14 @@ void SubtargetEmitter::EmitProcessorProp(raw_ostream &OS, const Record *R, OS << '\n'; } -void SubtargetEmitter::EmitProcessorResourceSubUnits( +void SubtargetEmitter::emitProcessorResourceSubUnits( const CodeGenProcModel &ProcModel, raw_ostream &OS) { OS << "\nstatic const unsigned " << ProcModel.ModelName << "ProcResourceSubUnits[] = {\n" << " 0, // Invalid\n"; - for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) { - const Record *PRDef = ProcModel.ProcResourceDefs[i]; + for (unsigned I = 0, E = ProcModel.ProcResourceDefs.size(); I < E; ++I) { + const Record *PRDef = ProcModel.ProcResourceDefs[I]; if (!PRDef->isSubClassOf("ProcResGroup")) continue; for (const Record *RUDef : PRDef->getValueAsListOfDefs("Resources")) { @@ -695,7 +695,7 @@ void SubtargetEmitter::EmitProcessorResourceSubUnits( OS << "};\n"; } -static void EmitRetireControlUnitInfo(const CodeGenProcModel &ProcModel, +static void emitRetireControlUnitInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS) { int64_t ReorderBufferSize = 0, MaxRetirePerCycle = 0; if (const Record *RCU = ProcModel.RetireControlUnit) { @@ -709,7 +709,7 @@ static void EmitRetireControlUnitInfo(const CodeGenProcModel &ProcModel, OS << MaxRetirePerCycle << ", // MaxRetirePerCycle\n "; } -static void EmitRegisterFileInfo(const CodeGenProcModel &ProcModel, +static void emitRegisterFileInfo(const CodeGenProcModel &ProcModel, unsigned NumRegisterFiles, unsigned NumCostEntries, raw_ostream &OS) { if (NumRegisterFiles) @@ -726,7 +726,7 @@ static void EmitRegisterFileInfo(const CodeGenProcModel &ProcModel, } unsigned -SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel, +SubtargetEmitter::emitRegisterFileTables(const CodeGenProcModel &ProcModel, raw_ostream &OS) { if (llvm::all_of(ProcModel.RegisterFiles, [](const CodeGenRegisterFile &RF) { return RF.hasDefaultCosts(); @@ -778,7 +778,7 @@ SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel, return CostTblIndex; } -void SubtargetEmitter::EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel, +void SubtargetEmitter::emitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS) { unsigned QueueID = 0; if (ProcModel.LoadQueue) { @@ -798,33 +798,33 @@ void SubtargetEmitter::EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel, OS << " " << QueueID << ", // Resource Descriptor for the Store Queue\n"; } -void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel, +void SubtargetEmitter::emitExtraProcessorInfo(const CodeGenProcModel &ProcModel, raw_ostream &OS) { // Generate a table of register file descriptors (one entry per each user // defined register file), and a table of register costs. - unsigned NumCostEntries = EmitRegisterFileTables(ProcModel, OS); + unsigned NumCostEntries = emitRegisterFileTables(ProcModel, OS); // Now generate a table for the extra processor info. OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName << "ExtraInfo = {\n "; // Add information related to the retire control unit. - EmitRetireControlUnitInfo(ProcModel, OS); + emitRetireControlUnitInfo(ProcModel, OS); // Add information related to the register files (i.e. where to find register // file descriptors and register costs). - EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(), + emitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(), NumCostEntries, OS); // Add information about load/store queues. - EmitLoadStoreQueueInfo(ProcModel, OS); + emitLoadStoreQueueInfo(ProcModel, OS); OS << "};\n"; } -void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, +void SubtargetEmitter::emitProcessorResources(const CodeGenProcModel &ProcModel, raw_ostream &OS) { - EmitProcessorResourceSubUnits(ProcModel, OS); + emitProcessorResourceSubUnits(ProcModel, OS); OS << "\n// {Name, NumUnits, SuperIdx, BufferSize, SubUnitsIdxBegin}\n"; OS << "static const llvm::MCProcResourceDesc " << ProcModel.ModelName @@ -833,8 +833,8 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, << " {\"InvalidUnit\", 0, 0, 0, 0},\n"; unsigned SubUnitsOffset = 1; - for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) { - const Record *PRDef = ProcModel.ProcResourceDefs[i]; + for (unsigned I = 0, E = ProcModel.ProcResourceDefs.size(); I < E; ++I) { + const Record *PRDef = ProcModel.ProcResourceDefs[I]; const Record *SuperDef = nullptr; unsigned SuperIdx = 0; @@ -866,7 +866,7 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, } else { OS << "nullptr"; } - OS << "}, // #" << i + 1; + OS << "}, // #" << I + 1; if (SuperDef) OS << ", Super=" << SuperDef->getName(); OS << "\n"; @@ -877,7 +877,7 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, // Find the WriteRes Record that defines processor resources for this // SchedWrite. const Record * -SubtargetEmitter::FindWriteResources(const CodeGenSchedRW &SchedWrite, +SubtargetEmitter::findWriteResources(const CodeGenSchedRW &SchedWrite, const CodeGenProcModel &ProcModel) { // Check if the SchedWrite is already subtarget-specific and directly @@ -938,7 +938,7 @@ SubtargetEmitter::FindWriteResources(const CodeGenSchedRW &SchedWrite, /// Find the ReadAdvance record for the given SchedRead on this processor or /// return NULL. const Record * -SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead, +SubtargetEmitter::findReadAdvance(const CodeGenSchedRW &SchedRead, const CodeGenProcModel &ProcModel) { // Check for SchedReads that directly specify a ReadAdvance. if (SchedRead.TheDef->isSubClassOf("SchedReadAdvance")) @@ -997,12 +997,12 @@ SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead, // Expand an explicit list of processor resources into a full list of implied // resource groups and super resources that cover them. -void SubtargetEmitter::ExpandProcResources( +void SubtargetEmitter::expandProcResources( ConstRecVec &PRVec, std::vector &ReleaseAtCycles, std::vector &AcquireAtCycles, const CodeGenProcModel &PM) { assert(PRVec.size() == ReleaseAtCycles.size() && "failed precondition"); - for (unsigned i = 0, e = PRVec.size(); i != e; ++i) { - const Record *PRDef = PRVec[i]; + for (unsigned I = 0, E = PRVec.size(); I != E; ++I) { + const Record *PRDef = PRVec[I]; ConstRecVec SubResources; if (PRDef->isSubClassOf("ProcResGroup")) SubResources = PRDef->getValueAsListOfDefs("Resources"); @@ -1019,8 +1019,8 @@ void SubtargetEmitter::ExpandProcResources( const Record *SuperDef = SchedModels.findProcResUnits( SubDef->getValueAsDef("Super"), PM, SubDef->getLoc()); PRVec.push_back(SuperDef); - ReleaseAtCycles.push_back(ReleaseAtCycles[i]); - AcquireAtCycles.push_back(AcquireAtCycles[i]); + ReleaseAtCycles.push_back(ReleaseAtCycles[I]); + AcquireAtCycles.push_back(AcquireAtCycles[I]); SubDef = SuperDef; } } @@ -1036,8 +1036,8 @@ void SubtargetEmitter::ExpandProcResources( } if (SubI == SubE) { PRVec.push_back(PR); - ReleaseAtCycles.push_back(ReleaseAtCycles[i]); - AcquireAtCycles.push_back(AcquireAtCycles[i]); + ReleaseAtCycles.push_back(ReleaseAtCycles[I]); + AcquireAtCycles.push_back(AcquireAtCycles[I]); } } } @@ -1045,7 +1045,7 @@ void SubtargetEmitter::ExpandProcResources( // Generate the SchedClass table for this processor and update global // tables. Must be called for each processor in order. -void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, +void SubtargetEmitter::genSchedClassTables(const CodeGenProcModel &ProcModel, SchedClassTables &SchedTables) { std::vector &SCTab = SchedTables.ProcSchedClasses.emplace_back(); @@ -1147,7 +1147,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, for (unsigned WS : WriteSeq) { const Record *WriteRes = - FindWriteResources(SchedModels.getSchedWrite(WS), ProcModel); + findWriteResources(SchedModels.getSchedWrite(WS), ProcModel); // Mark the parent class as invalid for unsupported write types. if (WriteRes->getValueAsBit("Unsupported")) { @@ -1209,7 +1209,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, assert(AcquireAtCycles.size() == ReleaseAtCycles.size()); - ExpandProcResources(PRVec, ReleaseAtCycles, AcquireAtCycles, ProcModel); + expandProcResources(PRVec, ReleaseAtCycles, AcquireAtCycles, ProcModel); assert(AcquireAtCycles.size() == ReleaseAtCycles.size()); for (unsigned PRIdx = 0, PREnd = PRVec.size(); PRIdx != PREnd; @@ -1263,7 +1263,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, for (unsigned UseIdx = 0, EndIdx = Reads.size(); UseIdx != EndIdx; ++UseIdx) { const Record *ReadAdvance = - FindReadAdvance(SchedModels.getSchedRead(Reads[UseIdx]), ProcModel); + findReadAdvance(SchedModels.getSchedRead(Reads[UseIdx]), ProcModel); if (!ReadAdvance) continue; @@ -1323,12 +1323,12 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, SchedTables.WriteLatencies.begin(), SchedTables.WriteLatencies.end(), WriteLatencies.begin(), WriteLatencies.end()); if (WLPos != SchedTables.WriteLatencies.end()) { - unsigned idx = WLPos - SchedTables.WriteLatencies.begin(); - SCDesc.WriteLatencyIdx = idx; - for (unsigned i = 0, e = WriteLatencies.size(); i < e; ++i) - if (SchedTables.WriterNames[idx + i].find(WriterNames[i]) == + unsigned Idx = WLPos - SchedTables.WriteLatencies.begin(); + SCDesc.WriteLatencyIdx = Idx; + for (unsigned I = 0, E = WriteLatencies.size(); I < E; ++I) + if (SchedTables.WriterNames[Idx + I].find(WriterNames[I]) == std::string::npos) { - SchedTables.WriterNames[idx + i] += std::string("_") + WriterNames[i]; + SchedTables.WriterNames[Idx + I] += std::string("_") + WriterNames[I]; } } else { SCDesc.WriteLatencyIdx = SchedTables.WriteLatencies.size(); @@ -1351,7 +1351,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, } // Emit SchedClass tables for all processors and associated global tables. -void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables, +void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables, raw_ostream &OS) { // Emit global WriteProcResTable. OS << "\n// {ProcResourceIdx, ReleaseAtCycle, AcquireAtCycle}\n" @@ -1446,15 +1446,15 @@ void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables, } } -void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { +void SubtargetEmitter::emitProcessorModels(raw_ostream &OS) { // For each processor model. for (const CodeGenProcModel &PM : SchedModels.procModels()) { // Emit extra processor info if available. if (PM.hasExtraProcessorInfo()) - EmitExtraProcessorInfo(PM, OS); + emitExtraProcessorInfo(PM, OS); // Emit processor resource table. if (PM.hasInstrSchedModel()) - EmitProcessorResources(PM, OS); + emitProcessorResources(PM, OS); else if (!PM.ProcResourceDefs.empty()) PrintFatalError(PM.ModelDef->getLoc(), "SchedMachineModel defines " @@ -1463,12 +1463,12 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { // Begin processor itinerary properties OS << "\n"; OS << "static const llvm::MCSchedModel " << PM.ModelName << " = {\n"; - EmitProcessorProp(OS, PM.ModelDef, "IssueWidth", ','); - EmitProcessorProp(OS, PM.ModelDef, "MicroOpBufferSize", ','); - EmitProcessorProp(OS, PM.ModelDef, "LoopMicroOpBufferSize", ','); - EmitProcessorProp(OS, PM.ModelDef, "LoadLatency", ','); - EmitProcessorProp(OS, PM.ModelDef, "HighLatency", ','); - EmitProcessorProp(OS, PM.ModelDef, "MispredictPenalty", ','); + emitProcessorProp(OS, PM.ModelDef, "IssueWidth", ','); + emitProcessorProp(OS, PM.ModelDef, "MicroOpBufferSize", ','); + emitProcessorProp(OS, PM.ModelDef, "LoopMicroOpBufferSize", ','); + emitProcessorProp(OS, PM.ModelDef, "LoadLatency", ','); + emitProcessorProp(OS, PM.ModelDef, "HighLatency", ','); + emitProcessorProp(OS, PM.ModelDef, "MispredictPenalty", ','); bool PostRAScheduler = (PM.ModelDef ? PM.ModelDef->getValueAsBit("PostRAScheduler") : false); @@ -1516,7 +1516,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { // // EmitSchedModel - Emits all scheduling model tables, folding common patterns. // -void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) { +void SubtargetEmitter::emitSchedModel(raw_ostream &OS) { OS << "#ifdef DBGFIELD\n" << "#error \"GenSubtargetInfo.inc requires a DBGFIELD macro\"\n" << "#endif\n" @@ -1529,22 +1529,22 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) { if (SchedModels.hasItineraries()) { std::vector> ProcItinLists; // Emit the stage data - EmitStageAndOperandCycleData(OS, ProcItinLists); - EmitItineraries(OS, ProcItinLists); + emitStageAndOperandCycleData(OS, ProcItinLists); + emitItineraries(OS, ProcItinLists); } OS << "\n// ===============================================================\n" << "// Data tables for the new per-operand machine model.\n"; SchedClassTables SchedTables; for (const CodeGenProcModel &ProcModel : SchedModels.procModels()) { - GenSchedClassTables(ProcModel, SchedTables); + genSchedClassTables(ProcModel, SchedTables); } - EmitSchedClassTables(SchedTables, OS); + emitSchedClassTables(SchedTables, OS); OS << "\n#undef DBGFIELD\n"; // Emit the processor machine model - EmitProcessorModels(OS); + emitProcessorModels(OS); } static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) { @@ -1756,7 +1756,7 @@ void SubtargetEmitter::emitSchedModelHelpersImpl( emitSchedModelHelperEpilogue(OS, OnlyExpandMCInstPredicates); } -void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName, +void SubtargetEmitter::emitSchedModelHelpers(const std::string &ClassName, raw_ostream &OS) { OS << "unsigned " << ClassName << "\n::resolveSchedClass(unsigned SchedClass, const MachineInstr *MI," @@ -1786,7 +1786,7 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName, PE.expandSTIPredicate(OS, Fn); } -void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName, +void SubtargetEmitter::emitHwModeCheck(const std::string &ClassName, raw_ostream &OS) { const CodeGenHwModes &CGH = TGT.getHwModes(); assert(CGH.getNumModeIds() > 0); @@ -1825,7 +1825,7 @@ void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName, OS << " return Modes;\n}\n"; // End emitting for getHwModeSet(). - auto handlePerMode = [&](std::string ModeType, unsigned ModeInBitSet) { + auto HandlePerMode = [&](std::string ModeType, unsigned ModeInBitSet) { OS << " case HwMode_" << ModeType << ":\n" << " Modes &= " << ModeInBitSet << ";\n" << " if (!Modes)\n return Modes;\n" @@ -1842,9 +1842,9 @@ void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName, OS << " if (!Modes)\n return Modes;\n\n"; OS << " switch (type) {\n"; OS << " case HwMode_Default:\n return llvm::countr_zero(Modes) + 1;\n"; - handlePerMode("ValueType", ValueTypeModes); - handlePerMode("RegInfo", RegInfoModes); - handlePerMode("EncodingInfo", EncodingInfoModes); + HandlePerMode("ValueType", ValueTypeModes); + HandlePerMode("RegInfo", RegInfoModes); + HandlePerMode("EncodingInfo", EncodingInfoModes); OS << " }\n"; OS << " llvm_unreachable(\"unexpected HwModeType\");\n" << " return 0; // should not get here\n}\n"; @@ -1871,7 +1871,7 @@ void SubtargetEmitter::emitGetMacroFusions(const std::string &ClassName, // Produces a subtarget specific function for parsing // the subtarget features string. -void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) { +void SubtargetEmitter::parseFeaturesFunction(raw_ostream &OS) { ArrayRef Features = Records.getAllDerivedDefinitions("SubtargetFeature"); @@ -1951,10 +1951,10 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { << " return MCSubtargetInfo::isCPUStringValid(CPU);\n" << " }\n"; OS << "};\n"; - EmitHwModeCheck(Target + "GenMCSubtargetInfo", OS); + emitHwModeCheck(Target + "GenMCSubtargetInfo", OS); } -void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { +void SubtargetEmitter::emitMcInstrAnalysisPredicateFunctions(raw_ostream &OS) { OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n"; OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n"; @@ -1988,18 +1988,18 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "#undef GET_SUBTARGETINFO_ENUM\n\n"; OS << "namespace llvm {\n"; - auto FeatureMap = Enumeration(OS); + auto FeatureMap = enumeration(OS); OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n"; - EmitSubtargetInfoMacroCalls(OS); + emitSubtargetInfoMacroCalls(OS); OS << "namespace llvm {\n"; - unsigned NumFeatures = FeatureKeyValues(OS, FeatureMap); + unsigned NumFeatures = featureKeyValues(OS, FeatureMap); OS << "\n"; - EmitSchedModel(OS); + emitSchedModel(OS); OS << "\n"; - unsigned NumProcs = CPUKeyValues(OS, FeatureMap); + unsigned NumProcs = cpuKeyValues(OS, FeatureMap); OS << "\n"; // MCInstrInfo initialization routine. @@ -2045,7 +2045,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "#include \"llvm/Support/raw_ostream.h\"\n\n"; if (Target == "AArch64") OS << "#include \"llvm/TargetParser/AArch64TargetParser.h\"\n\n"; - ParseFeaturesFunction(OS); + parseFeaturesFunction(OS); OS << "#endif // GET_SUBTARGETINFO_TARGET_DESC\n\n"; @@ -2140,15 +2140,15 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "nullptr, nullptr, nullptr"; OS << ") {}\n\n"; - EmitSchedModelHelpers(ClassName, OS); - EmitHwModeCheck(ClassName, OS); + emitSchedModelHelpers(ClassName, OS); + emitHwModeCheck(ClassName, OS); emitGetMacroFusions(ClassName, OS); OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n"; - EmitMCInstrAnalysisPredicateFunctions(OS); + emitMcInstrAnalysisPredicateFunctions(OS); } static TableGen::Emitter::OptClass diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index fff4c6b7c27ada1..bea2a2e735dbe27 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -39,17 +39,17 @@ static cl::opt Class("class", cl::value_desc("class name"), cl::cat(PrintEnumsCat)); -static void PrintRecords(const RecordKeeper &Records, raw_ostream &OS) { +static void printRecords(const RecordKeeper &Records, raw_ostream &OS) { OS << Records; // No argument, dump all contents } -static void PrintEnums(const RecordKeeper &Records, raw_ostream &OS) { +static void printEnums(const RecordKeeper &Records, raw_ostream &OS) { for (const Record *Rec : Records.getAllDerivedDefinitions(Class)) OS << Rec->getName() << ", "; OS << "\n"; } -static void PrintSets(const RecordKeeper &Records, raw_ostream &OS) { +static void printSets(const RecordKeeper &Records, raw_ostream &OS) { SetTheory Sets; Sets.addFieldExpander("Set", "Elements"); for (const Record *Rec : Records.getAllDerivedDefinitions("Set")) { @@ -63,15 +63,15 @@ static void PrintSets(const RecordKeeper &Records, raw_ostream &OS) { } static TableGen::Emitter::Opt X[] = { - {"print-records", PrintRecords, "Print all records to stdout (default)", + {"print-records", printRecords, "Print all records to stdout (default)", true}, {"print-detailed-records", EmitDetailedRecords, "Print full details of all records to stdout"}, {"null-backend", [](const RecordKeeper &Records, raw_ostream &OS) {}, "Do nothing after parsing (useful for timing)"}, {"dump-json", EmitJSON, "Dump all records as machine-readable JSON"}, - {"print-enums", PrintEnums, "Print enum values for a class"}, - {"print-sets", PrintSets, "Print expanded sets for testing DAG exprs"}, + {"print-enums", printEnums, "Print enum values for a class"}, + {"print-sets", printSets, "Print expanded sets for testing DAG exprs"}, }; int main(int argc, char **argv) { diff --git a/llvm/utils/TableGen/VTEmitter.cpp b/llvm/utils/TableGen/VTEmitter.cpp index 4cbc7abd699d204..d02932dd5e7fca5 100644 --- a/llvm/utils/TableGen/VTEmitter.cpp +++ b/llvm/utils/TableGen/VTEmitter.cpp @@ -28,7 +28,7 @@ class VTEmitter { } // End anonymous namespace. -static void VTtoGetLLVMTyString(raw_ostream &OS, const Record *VT) { +static void vTtoGetLlvmTyString(raw_ostream &OS, const Record *VT) { bool IsVector = VT->getValueAsBit("isVector"); bool IsRISCVVecTuple = VT->getValueAsBit("isRISCVVecTuple"); @@ -207,7 +207,7 @@ void VTEmitter::run(raw_ostream &OS) { continue; OS << " GET_VT_EVT(" << VT->getValueAsString("LLVMName") << ", "; - VTtoGetLLVMTyString(OS, VT); + vTtoGetLlvmTyString(OS, VT); OS << ")\n"; } OS << "#endif\n\n"; From f22c9ddb36dca84547212e087de3319dcc6bea49 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 29 Oct 2024 08:17:43 -0700 Subject: [PATCH 336/425] [ORC] Single-symbol convenience method does not need to be virtual. This convenience method just calls the general case which is already virtual. --- llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h b/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h index 4004c42d9146843..f2ea1f5b64c5332 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h @@ -32,8 +32,8 @@ class RedirectionManager { /// Change the redirection destination of given symbol to new destination /// symbol. - virtual Error redirect(JITDylib &JD, SymbolStringPtr Symbol, - ExecutorSymbolDef NewDest) { + Error redirect(JITDylib &JD, SymbolStringPtr Symbol, + ExecutorSymbolDef NewDest) { return redirect(JD, {{Symbol, NewDest}}); } From 9e37cbb469c0ec2fdbf4e3e7b0d9a2938ac30b01 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 29 Oct 2024 08:36:56 -0700 Subject: [PATCH 337/425] [ORC] Add some missing FIXMEs, move a temporary Error into an if condition. --- .../Orc/JITLinkRedirectableSymbolManager.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.cpp b/llvm/lib/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.cpp index 4ef217e6c562db2..81294cad4d7d426 100644 --- a/llvm/lib/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.cpp +++ b/llvm/lib/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.cpp @@ -52,17 +52,18 @@ void JITLinkRedirectableSymbolManager::emitRedirectableSymbols( return; } + // FIXME: return stubs to the pool here too. if (auto Err = R->replace(absoluteSymbols(NewSymbolDefs))) { ES.reportError(std::move(Err)); R->failMaterialization(); return; } - auto Err = R->withResourceKeyDo([&](ResourceKey Key) { - TrackedResources[Key].insert(TrackedResources[Key].end(), Symbols.begin(), - Symbols.end()); - }); - if (Err) { + // FIXME: return stubs to the pool here too. + if (auto Err = R->withResourceKeyDo([&](ResourceKey Key) { + TrackedResources[Key].insert(TrackedResources[Key].end(), + Symbols.begin(), Symbols.end()); + })) { ES.reportError(std::move(Err)); R->failMaterialization(); return; From 8e14c6c172b122203f46a9ad114d51c74535cbb7 Mon Sep 17 00:00:00 2001 From: Kelvin Li Date: Tue, 29 Oct 2024 14:20:11 -0400 Subject: [PATCH 338/425] [flang] Support -mabi=vec-extabi and -mabi=vec-default on AIX (#113215) This option is to enable the AIX extended and default vector ABIs. --- clang/include/clang/Driver/Options.td | 4 ++- clang/lib/Driver/ToolChains/Flang.cpp | 31 ++++++++++++++++++++ clang/lib/Driver/ToolChains/Flang.h | 7 +++++ flang/include/flang/Frontend/TargetOptions.h | 3 ++ flang/lib/Frontend/CompilerInstance.cpp | 7 +++-- flang/lib/Frontend/CompilerInvocation.cpp | 10 +++++++ flang/test/Driver/mabi.f90 | 17 +++++++++++ 7 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 flang/test/Driver/mabi.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1ddf488b8bf4c7a..9d595984b63c4bf 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4678,7 +4678,8 @@ def malign_loops_EQ : Joined<["-"], "malign-loops=">, Group; def malign_branch_boundary_EQ : Joined<["-"], "malign-branch-boundary=">, Group, @@ -7363,6 +7364,7 @@ def mabi_EQ_ieeelongdouble : Flag<["-"], "mabi=ieeelongdouble">, HelpText<"Use IEEE 754 quadruple-precision for long double">, MarshallingInfoFlag>; def mabi_EQ_vec_extabi : Flag<["-"], "mabi=vec-extabi">, + Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Enable the extended Altivec ABI on AIX. Use volatile and nonvolatile vector registers">, MarshallingInfoFlag>; def mfloat_abi : Separate<["-"], "mfloat-abi">, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index a9d2b7a4dc48f9f..f9d2fdffe3b2fc0 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -203,6 +203,32 @@ void Flang::AddAArch64TargetArgs(const ArgList &Args, } } +void Flang::AddPPCTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const Driver &D = getToolChain().getDriver(); + bool VecExtabi = false; + + if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { + StringRef V = A->getValue(); + if (V == "vec-extabi") + VecExtabi = true; + else if (V == "vec-default") + VecExtabi = false; + else + D.Diag(diag::err_drv_unsupported_option_argument) + << A->getSpelling() << V; + } + + const llvm::Triple &T = getToolChain().getTriple(); + if (VecExtabi) { + if (!T.isOSAIX()) { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << "-mabi=vec-extabi" << T.str(); + } + CmdArgs.push_back("-mabi=vec-extabi"); + } +} + void Flang::AddRISCVTargetArgs(const ArgList &Args, ArgStringList &CmdArgs) const { const llvm::Triple &Triple = getToolChain().getTriple(); @@ -383,6 +409,11 @@ void Flang::addTargetOptions(const ArgList &Args, getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); AddX86_64TargetArgs(Args, CmdArgs); break; + case llvm::Triple::ppc: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: + AddPPCTargetArgs(Args, CmdArgs); + break; } if (Arg *A = Args.getLastArg(options::OPT_fveclib)) { diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h index 9f5e26b8608324d..4d7d0b8cd9ea55c 100644 --- a/clang/lib/Driver/ToolChains/Flang.h +++ b/clang/lib/Driver/ToolChains/Flang.h @@ -84,6 +84,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool { void AddX86_64TargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// Add specific options for PPC target. + /// + /// \param [in] Args The list of input driver arguments + /// \param [out] CmdArgs The list of output command arguments + void AddPPCTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + /// Extract offload options from the driver arguments and add them to /// the command arguments. /// \param [in] C The current compilation for the driver invocation diff --git a/flang/include/flang/Frontend/TargetOptions.h b/flang/include/flang/Frontend/TargetOptions.h index 332adcbe6b6ac35..01c878067b921dc 100644 --- a/flang/include/flang/Frontend/TargetOptions.h +++ b/flang/include/flang/Frontend/TargetOptions.h @@ -44,6 +44,9 @@ class TargetOptions { /// The integer KINDs disabled for this target std::vector disabledIntegerKinds; + + /// Extended Altivec ABI on AIX + bool EnableAIXExtendedAltivecABI; }; } // end namespace Fortran::frontend diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp index d37430e0e5773e2..35c2ae3c73e69e8 100644 --- a/flang/lib/Frontend/CompilerInstance.cpp +++ b/flang/lib/Frontend/CompilerInstance.cpp @@ -313,7 +313,6 @@ bool CompilerInstance::setUpTargetMachine() { << error; return false; } - // Create `TargetMachine` const auto &CGOpts = getInvocation().getCodeGenOpts(); std::optional OptLevelOrNone = @@ -322,9 +321,13 @@ bool CompilerInstance::setUpTargetMachine() { llvm::CodeGenOptLevel OptLevel = *OptLevelOrNone; std::string featuresStr = getTargetFeatures(); std::optional cm = getCodeModel(CGOpts.CodeModel); + + llvm::TargetOptions tOpts = llvm::TargetOptions(); + tOpts.EnableAIXExtendedAltivecABI = targetOpts.EnableAIXExtendedAltivecABI; + targetMachine.reset(theTarget->createTargetMachine( theTriple, /*CPU=*/targetOpts.cpu, - /*Features=*/featuresStr, llvm::TargetOptions(), + /*Features=*/featuresStr, /*Options=*/tOpts, /*Reloc::Model=*/CGOpts.getRelocationModel(), /*CodeModel::Model=*/cm, OptLevel)); assert(targetMachine && "Failed to create TargetMachine"); diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 94d3d1154178775..1214a2ea6bf1f3f 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -457,6 +457,16 @@ static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) { if (args.hasArg(clang::driver::options::OPT_fdisable_integer_16)) opts.disabledIntegerKinds.push_back(16); + + if (const llvm::opt::Arg *a = + args.getLastArg(clang::driver::options::OPT_mabi_EQ)) { + llvm::StringRef V = a->getValue(); + if (V == "vec-extabi") { + opts.EnableAIXExtendedAltivecABI = true; + } else if (V == "vec-default") { + opts.EnableAIXExtendedAltivecABI = false; + } + } } // Tweak the frontend configuration based on the frontend action static void setUpFrontendBasedOnAction(FrontendOptions &opts) { diff --git a/flang/test/Driver/mabi.f90 b/flang/test/Driver/mabi.f90 new file mode 100644 index 000000000000000..88fd4d2a993fb04 --- /dev/null +++ b/flang/test/Driver/mabi.f90 @@ -0,0 +1,17 @@ +! RUN: not %flang -### -c --target=powerpc64le-unknown-linux -mabi=vec-extabi %s 2>&1 | FileCheck --check-prefix=INVALID1 %s +! RUN: not %flang -### -c --target=x86_64-unknown-linux -mabi=vec-extabi %s 2>&1 | FileCheck --check-prefix=INVALID2 %s +! RUN: not %flang -### -c --target=powerpc-unknown-aix -mabi=abc %s 2>&1 | FileCheck --check-prefix=INVALID3 %s +! RUN: %flang -### -c -target powerpc-unknown-aix %s 2>&1 | FileCheck --implicit-check-not=vec-extabi %s +! RUN: %flang -### -c -target powerpc-unknown-aix -mabi=vec-default %s 2>&1 | FileCheck --implicit-check-not=vec-extabi %s +! RUN: %flang -### -c -target powerpc-unknown-aix -mabi=vec-extabi %s 2>&1 | FileCheck --check-prefix=EXTABI %s + +! REQUIRES: target=powerpc{{.*}} + +! INVALID1: error: unsupported option '-mabi=vec-extabi' for target '{{.*}}' +! INVALID2: error: unsupported option '-mabi=' for target '{{.*}}' +! INVALID3: error: unsupported argument 'abc' to option '-mabi=' + +! EXTABI: "-fc1" +! EXTABI-SAME: "-mabi=vec-extabi" + + From efc6d33be9f4b4d0f0e8d3d5f198f2616b75792b Mon Sep 17 00:00:00 2001 From: Wanyi Date: Tue, 29 Oct 2024 14:22:51 -0400 Subject: [PATCH 339/425] [lldb] Fix write only file action to truncate the file (#112657) When `FileAction` opens file with write access, it doesn't clear the file nor append to the end of the file if it already exists. Instead, it writes from cursor index 0. For example, by using the settings `target.output-path` and `target.error-path`, lldb will redirect process stdout/stderr to files. It then calls this function to write to the files which the above symptoms appear. ## Test - Added unit test checking the file flags - Added 2 api tests checking - File content overwritten if the file path already exists - Stdout and stderr redirection to the same file doesn't change its behavior --- lldb/source/Host/common/FileAction.cpp | 2 +- .../API/commands/settings/TestSettings.py | 53 +++++++++++++++++++ .../python_api/process/io/TestProcessIO.py | 30 +++++++++++ lldb/unittests/Host/FileActionTest.cpp | 25 +++++++++ llvm/docs/ReleaseNotes.md | 2 + 5 files changed, 111 insertions(+), 1 deletion(-) diff --git a/lldb/source/Host/common/FileAction.cpp b/lldb/source/Host/common/FileAction.cpp index f980d3224640e07..e1c3e14a165ea97 100644 --- a/lldb/source/Host/common/FileAction.cpp +++ b/lldb/source/Host/common/FileAction.cpp @@ -41,7 +41,7 @@ bool FileAction::Open(int fd, const FileSpec &file_spec, bool read, else if (read) m_arg = O_NOCTTY | O_RDONLY; else - m_arg = O_NOCTTY | O_CREAT | O_WRONLY; + m_arg = O_NOCTTY | O_CREAT | O_WRONLY | O_TRUNC; m_file_spec = file_spec; return true; } else { diff --git a/lldb/test/API/commands/settings/TestSettings.py b/lldb/test/API/commands/settings/TestSettings.py index 385acceb7a8b5c1..2dd813f6b155b36 100644 --- a/lldb/test/API/commands/settings/TestSettings.py +++ b/lldb/test/API/commands/settings/TestSettings.py @@ -528,6 +528,59 @@ def test_set_error_output_path(self): output, exe=False, startstr="This message should go to standard out." ) + @skipIfDarwinEmbedded # debugserver on ios etc can't write files + def test_same_error_output_path(self): + """Test that setting target.error and output-path to the same file path for the launched process works.""" + self.build() + + exe = self.getBuildArtifact("a.out") + self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET) + + # Set the error-path and output-path and verify both are set. + self.runCmd( + "settings set target.error-path '{0}'".format( + lldbutil.append_to_process_working_directory(self, "output.txt") + ) + ) + self.runCmd( + "settings set target.output-path '{0}".format( + lldbutil.append_to_process_working_directory(self, "output.txt") + ) + ) + # And add hooks to restore the original settings during tearDown(). + self.addTearDownHook(lambda: self.runCmd("settings clear target.output-path")) + self.addTearDownHook(lambda: self.runCmd("settings clear target.error-path")) + + self.expect( + "settings show target.error-path", + SETTING_MSG("target.error-path"), + substrs=["target.error-path (file)", 'output.txt"'], + ) + + self.expect( + "settings show target.output-path", + SETTING_MSG("target.output-path"), + substrs=["target.output-path (file)", 'output.txt"'], + ) + + self.runCmd( + "process launch --working-dir '{0}'".format( + self.get_process_working_directory() + ), + RUN_SUCCEEDED, + ) + + output = lldbutil.read_file_from_process_wd(self, "output.txt") + err_message = "This message should go to standard error." + out_message = "This message should go to standard out." + # Error msg should get flushed by the output msg + self.expect(output, exe=False, substrs=[out_message]) + self.assertNotIn( + err_message, + output, + "Race condition when both stderr/stdout redirects to the same file", + ) + def test_print_dictionary_setting(self): self.runCmd("settings clear target.env-vars") self.runCmd('settings set target.env-vars ["MY_VAR"]=some-value') diff --git a/lldb/test/API/python_api/process/io/TestProcessIO.py b/lldb/test/API/python_api/process/io/TestProcessIO.py index 5bb91d2758312df..3b5c7c48c51f4df 100644 --- a/lldb/test/API/python_api/process/io/TestProcessIO.py +++ b/lldb/test/API/python_api/process/io/TestProcessIO.py @@ -95,6 +95,36 @@ def test_stdout_stderr_redirection(self): error = self.read_error_file_and_delete() self.check_process_output(output, error) + @skipIfWindows # stdio manipulation unsupported on Windows + @expectedFlakeyLinux(bugnumber="llvm.org/pr26437") + @skipIfDarwinEmbedded # debugserver can't create/write files on the device + def test_stdout_stderr_redirection_to_existing_files(self): + """Exercise SBLaunchInfo::AddOpenFileAction() for STDOUT and STDERR without redirecting STDIN to output files already exist.""" + self.setup_test() + self.build() + self.create_target() + self.write_file_with_placeholder(self.output_file) + self.write_file_with_placeholder(self.error_file) + self.redirect_stdout() + self.redirect_stderr() + self.run_process(True) + output = self.read_output_file_and_delete() + error = self.read_error_file_and_delete() + self.check_process_output(output, error) + + def write_file_with_placeholder(self, target_file): + placeholder = "This content should be overwritten." + if lldb.remote_platform: + self.runCmd( + 'platform file write "{target}" -d "{data}"'.format( + target=target_file, data=placeholder + ) + ) + else: + f = open(target_file, "w") + f.write(placeholder) + f.close() + # target_file - path on local file system or remote file system if running remote # local_file - path on local system def read_file_and_delete(self, target_file, local_file): diff --git a/lldb/unittests/Host/FileActionTest.cpp b/lldb/unittests/Host/FileActionTest.cpp index b208169aac20e60..3d2c722552c9c2f 100644 --- a/lldb/unittests/Host/FileActionTest.cpp +++ b/lldb/unittests/Host/FileActionTest.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "lldb/Host/FileAction.h" #include "gtest/gtest.h" @@ -17,3 +19,26 @@ TEST(FileActionTest, Open) { EXPECT_EQ(Action.GetAction(), FileAction::eFileActionOpen); EXPECT_EQ(Action.GetFileSpec(), FileSpec("/tmp")); } + +TEST(FileActionTest, OpenReadWrite) { + FileAction Action; + Action.Open(48, FileSpec("/tmp_0"), /*read*/ true, /*write*/ true); + EXPECT_TRUE(Action.GetActionArgument() & (O_NOCTTY | O_CREAT | O_RDWR)); + EXPECT_FALSE(Action.GetActionArgument() & O_RDONLY); + EXPECT_FALSE(Action.GetActionArgument() & O_WRONLY); +} + +TEST(FileActionTest, OpenReadOnly) { + FileAction Action; + Action.Open(49, FileSpec("/tmp_1"), /*read*/ true, /*write*/ false); + EXPECT_TRUE(Action.GetActionArgument() & (O_NOCTTY | O_RDONLY)); + EXPECT_FALSE(Action.GetActionArgument() & O_WRONLY); +} + +TEST(FileActionTest, OpenWriteOnly) { + FileAction Action; + Action.Open(50, FileSpec("/tmp_2"), /*read*/ false, /*write*/ true); + EXPECT_TRUE(Action.GetActionArgument() & + (O_NOCTTY | O_CREAT | O_WRONLY | O_TRUNC)); + EXPECT_FALSE(Action.GetActionArgument() & O_RDONLY); +} diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 92a45d845f1db8e..d5c650e74eeb28e 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -301,6 +301,8 @@ Changes to LLDB * LLDB can now read the `fpmr` register from AArch64 Linux processes and core files. +* Program stdout/stderr redirection will now open the file with O_TRUNC flag, make sure to truncate the file if path already exists. + * eg. `settings set target.output-path/target.error-path ` Changes to BOLT --------------------------------- From b4e1af0096fd05ed4bddf11b48b604d75a7103d0 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Tue, 29 Oct 2024 18:40:06 +0000 Subject: [PATCH 340/425] [lldb-dap] Always pass disableASLR to the DAP executable (#113891) More context can be found in https://github.com/llvm/llvm-project/pull/110303 For DAP tests running in constrained environments (e.g., Docker containers), disabling ASLR isn't allowed. So we set `disableASLR=False` (since https://github.com/llvm/llvm-project/pull/113593). However, the `dap_server.py` will currently only forward the value of `disableASLR` to the DAP executable if it's set to `True`. If the DAP executable wasn't provided a `disableASLR` field it defaults to `true` too: https://github.com/llvm/llvm-project/blob/f14743794587db102c6d1b20f9c87a1ac20decfd/lldb/tools/lldb-dap/lldb-dap.cpp#L2103-L2104 This means that passing `disableASLR=False` from the tests is currently not possible. This is also true for many of the other boolean arguments of `request_launch`. But this patch only addresses `disableASLR` for now since it's blocking a libc++ patch. --- .../Python/lldbsuite/test/tools/lldb-dap/dap_server.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 63748a71f1122d8..c29992ce9c7848e 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -793,8 +793,6 @@ def request_launch( args_dict["env"] = env if stopOnEntry: args_dict["stopOnEntry"] = stopOnEntry - if disableASLR: - args_dict["disableASLR"] = disableASLR if disableSTDIO: args_dict["disableSTDIO"] = disableSTDIO if shellExpandArguments: @@ -829,6 +827,7 @@ def request_launch( if customThreadFormat: args_dict["customThreadFormat"] = customThreadFormat + args_dict["disableASLR"] = disableASLR args_dict["enableAutoVariableSummaries"] = enableAutoVariableSummaries args_dict["enableSyntheticChildDebugging"] = enableSyntheticChildDebugging args_dict["displayExtendedBacktrace"] = displayExtendedBacktrace From b9978f8c7792a8bfdbef8912b3db7617bc5fddff Mon Sep 17 00:00:00 2001 From: Renaud Kauffmann Date: Tue, 29 Oct 2024 11:48:48 -0700 Subject: [PATCH 341/425] [flang][cuda] Adding variable registration in constructor (#113976) 1) Adding variable registration in constructor 2) Applying feedback from PR https://github.com/llvm/llvm-project/pull/112989 --- .../Transforms/CUFAddConstructor.cpp | 72 +++++++++++++++++-- .../Optimizer/Transforms/CUFOpConversion.cpp | 2 +- flang/test/Fir/CUDA/cuda-constructor-2.f90 | 22 ++++++ 3 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 flang/test/Fir/CUDA/cuda-constructor-2.f90 diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp index 4da06be8ef7dd95..7cdb2f7ffe27d97 100644 --- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp +++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp @@ -6,15 +6,23 @@ // //===----------------------------------------------------------------------===// +#include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/Runtime/RTBuilder.h" +#include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/CodeGen/Target.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" +#include "flang/Optimizer/Support/DataLayout.h" #include "flang/Optimizer/Transforms/CUFCommon.h" +#include "flang/Runtime/CUDA/registration.h" #include "flang/Runtime/entry-names.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Value.h" #include "mlir/Pass/Pass.h" #include "llvm/ADT/SmallVector.h" @@ -23,6 +31,8 @@ namespace fir { #include "flang/Optimizer/Transforms/Passes.h.inc" } // namespace fir +using namespace Fortran::runtime::cuda; + namespace { static constexpr llvm::StringRef cudaFortranCtorName{ @@ -34,13 +44,23 @@ struct CUFAddConstructor void runOnOperation() override { mlir::ModuleOp mod = getOperation(); mlir::SymbolTable symTab(mod); - mlir::OpBuilder builder{mod.getBodyRegion()}; + mlir::OpBuilder opBuilder{mod.getBodyRegion()}; + fir::FirOpBuilder builder(opBuilder, mod); + fir::KindMapping kindMap{fir::getKindMapping(mod)}; builder.setInsertionPointToEnd(mod.getBody()); mlir::Location loc = mod.getLoc(); auto *ctx = mod.getContext(); auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx); + auto idxTy = builder.getIndexType(); auto funcTy = mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false); + std::optional dl = + fir::support::getOrSetDataLayout(mod, /*allowDefaultLayout=*/false); + if (!dl) { + mlir::emitError(mod.getLoc(), + "data layout attribute is required to perform " + + getName() + "pass"); + } // Symbol reference to CUFRegisterAllocator. builder.setInsertionPointToEnd(mod.getBody()); @@ -58,12 +78,13 @@ struct CUFAddConstructor builder.setInsertionPointToStart(func.addEntryBlock(builder)); builder.create(loc, funcTy, cufRegisterAllocatorRef); - // Register kernels auto gpuMod = symTab.lookup(cudaDeviceModuleName); if (gpuMod) { auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(ctx); auto registeredMod = builder.create( loc, llvmPtrTy, mlir::SymbolRefAttr::get(ctx, gpuMod.getName())); + + // Register kernels for (auto func : gpuMod.getOps()) { if (func.isKernel()) { auto kernelName = mlir::SymbolRefAttr::get( @@ -72,12 +93,55 @@ struct CUFAddConstructor builder.create(loc, kernelName, registeredMod); } } + + // Register variables + for (fir::GlobalOp globalOp : mod.getOps()) { + auto attr = globalOp.getDataAttrAttr(); + if (!attr) + continue; + + mlir::func::FuncOp func; + switch (attr.getValue()) { + case cuf::DataAttribute::Device: + case cuf::DataAttribute::Constant: { + func = fir::runtime::getRuntimeFunc( + loc, builder); + auto fTy = func.getFunctionType(); + + // Global variable name + std::string gblNameStr = globalOp.getSymbol().getValue().str(); + gblNameStr += '\0'; + mlir::Value gblName = fir::getBase( + fir::factory::createStringLiteral(builder, loc, gblNameStr)); + + // Global variable size + auto sizeAndAlign = fir::getTypeSizeAndAlignmentOrCrash( + loc, globalOp.getType(), *dl, kindMap); + auto size = + builder.createIntegerConstant(loc, idxTy, sizeAndAlign.first); + + // Global variable address + mlir::Value addr = builder.create( + loc, globalOp.resultType(), globalOp.getSymbol()); + + llvm::SmallVector args{fir::runtime::createArguments( + builder, loc, fTy, registeredMod, addr, gblName, size)}; + builder.create(loc, func, args); + } break; + case cuf::DataAttribute::Managed: + TODO(loc, "registration of managed variables"); + default: + break; + } + if (!func) + continue; + } } builder.create(loc, mlir::ValueRange{}); // Create the llvm.global_ctor with the function. - // TODO: We might want to have a utility that retrieve it if already created - // and adds new functions. + // TODO: We might want to have a utility that retrieve it if already + // created and adds new functions. builder.setInsertionPointToEnd(mod.getBody()); llvm::SmallVector funcs; funcs.push_back( diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 9c2b882c7f46fef..14cc1cb508cfc01 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -111,7 +111,7 @@ mlir::Value getDeviceAddress(mlir::PatternRewriter &rewriter, switch (attr.getValue()) { case cuf::DataAttribute::Device: case cuf::DataAttribute::Managed: - case cuf::DataAttribute::Pinned: + case cuf::DataAttribute::Constant: isDevGlobal = true; break; default: diff --git a/flang/test/Fir/CUDA/cuda-constructor-2.f90 b/flang/test/Fir/CUDA/cuda-constructor-2.f90 new file mode 100644 index 000000000000000..378dabbb7c7e7d9 --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-constructor-2.f90 @@ -0,0 +1,22 @@ +// RUN: fir-opt --split-input-file --cuf-add-constructor %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git cae351f3453a0a26ec8eb2ddaf773c24a29d929e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { + + fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda} : !fir.array<5xi32> + + gpu.module @cuda_device_mod [#nvvm.target] { + } +} + +// CHECK: gpu.module @cuda_device_mod [#nvvm.target] + +// CHECK: llvm.func internal @__cudaFortranConstructor() { +// CHECK-DAG: %[[MODULE:.*]] = cuf.register_module @cuda_device_mod -> !llvm.ptr +// CHECK-DAG: %[[VAR_NAME:.*]] = fir.address_of(@_QQ{{.*}}) : !fir.ref> +// CHECK-DAG: %[[VAR_ADDR:.*]] = fir.address_of(@_QMmtestsEn) : !fir.ref> +// CHECK-DAG: %[[MODULE2:.*]] = fir.convert %[[MODULE]] : (!llvm.ptr) -> !fir.ref> +// CHECK-DAG: %[[VAR_ADDR2:.*]] = fir.convert %[[VAR_ADDR]] : (!fir.ref>) -> !fir.ref +// CHECK-DAG: %[[VAR_NAME2:.*]] = fir.convert %[[VAR_NAME]] : (!fir.ref>) -> !fir.ref +// CHECK-DAG: %[[CST:.*]] = arith.constant 20 : index +// CHECK-DAG %[[CST2:.*]] = fir.convert %[[CST]] : (index) -> i64 +// CHECK fir.call @_FortranACUFRegisterVariable(%[[MODULE2]], %[[VAR_ADDR2]], %[[VAR_NAME2]], %[[CST2]]) : (!fir.ref>, !fir.ref, !fir.ref, i64) -> none From c79827cd15ad31b77702e63e5050c1a8b0b44825 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 29 Oct 2024 12:05:18 -0700 Subject: [PATCH 342/425] [SandboxIR] Fix a warning This patch fixes: llvm/lib/SandboxIR/Context.cpp:684:22: error: unused variable 'MaxRegisteredCallbacks' [-Werror,-Wunused-const-variable] --- llvm/lib/SandboxIR/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp index 5e5cbbbc4515d23..b86ed5864c1ac1d 100644 --- a/llvm/lib/SandboxIR/Context.cpp +++ b/llvm/lib/SandboxIR/Context.cpp @@ -681,7 +681,7 @@ void Context::runMoveInstrCallbacks(Instruction *I, const BBIterator &WhereIt) { // An arbitrary limit, to check for accidental misuse. We expect a small number // of callbacks to be registered at a time, but we can increase this number if // we discover we needed more. -static constexpr int MaxRegisteredCallbacks = 16; +[[maybe_unused]] static constexpr int MaxRegisteredCallbacks = 16; Context::CallbackID Context::registerEraseInstrCallback(EraseInstrCallback CB) { assert(EraseInstrCallbacks.size() <= MaxRegisteredCallbacks && From 9cc5a4bf667ffcd2765a6a00a311fb4ec8559b37 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Tue, 29 Oct 2024 12:23:47 -0700 Subject: [PATCH 343/425] Remove llvm::shouldOptForSize() from Utils.h (#112630) Remove `llvm::shouldOptForSize()` from `Utils.h` since we can use `llvm::shouldOptimizeForSize()` from `SizeOpts.h` instead. Depends on https://github.com/llvm/llvm-project/pull/112626 --- .../llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h | 9 +++++++-- llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 4 ---- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 5 ----- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h index 7b42722ca8d4f10..b4ff4cd178d7575 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/Function.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include #include #include @@ -635,8 +636,12 @@ class GIMatchTableExecutor { bool shouldOptForSize(const MachineFunction *MF) const { const auto &F = MF->getFunction(); - return F.hasOptSize() || F.hasMinSize() || - (PSI && BFI && CurMBB && llvm::shouldOptForSize(*CurMBB, PSI, BFI)); + if (F.hasOptSize()) + return true; + if (CurMBB) + if (auto *BB = CurMBB->getBasicBlock()) + return llvm::shouldOptimizeForSize(BB, PSI, BFI); + return false; } public: diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 95a8234d3c60808..4016247376c4f65 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -542,10 +542,6 @@ bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, /// TargetBooleanContents. int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP); -/// Returns true if the given block should be optimized for size. -bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI); - using SmallInstListTy = GISelWorkList<4>; void saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver, diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 513a49b4fc2e4df..dcbbb0871a8445e 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1619,11 +1619,6 @@ int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, llvm_unreachable("Invalid boolean contents"); } -bool llvm::shouldOptForSize(const MachineBasicBlock &MBB, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { - return llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI); -} - void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver, SmallInstListTy &DeadInstChain) { From a18af41c20ac9ca22e3c95da3d71475f9f6c31b5 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 29 Oct 2024 12:26:33 -0700 Subject: [PATCH 344/425] [LLVM] Change error messages to start with lower case (#113748) Change LLVM Asm and TableGen Lexer/Parser error messages to begin with lower case. --- llvm/lib/AsmParser/LLLexer.cpp | 24 +++--- llvm/lib/TableGen/TGLexer.cpp | 73 +++++++++--------- llvm/test/Assembler/invalid-inttype.ll | 2 +- llvm/test/Assembler/invalid-name.ll | Bin 207 -> 209 bytes llvm/test/Assembler/invalid-name2.ll | Bin 185 -> 187 bytes llvm/test/TableGen/64-bit-int.td | 2 +- .../invalid-macro-name-command-line.td | 6 +- llvm/test/TableGen/prep-diag1.td | 8 +- llvm/test/TableGen/prep-diag10.td | 4 +- llvm/test/TableGen/prep-diag11.td | 4 +- llvm/test/TableGen/prep-diag12.td | 4 +- llvm/test/TableGen/prep-diag13.td | 4 +- llvm/test/TableGen/prep-diag14.td | 4 +- llvm/test/TableGen/prep-diag2.td | 4 +- llvm/test/TableGen/prep-diag3.td | 4 +- llvm/test/TableGen/prep-diag4.td | 2 +- llvm/test/TableGen/prep-diag6.td | 2 +- llvm/test/TableGen/prep-diag8.td | 2 +- llvm/test/TableGen/prep-diag9.td | 4 +- llvm/test/TableGen/prep-ifndef-diag-1.td | 2 +- llvm/test/TableGen/prep-ifndef-diag-2.td | 2 +- llvm/test/TableGen/unterminated-c-comment.td | 2 +- llvm/test/TableGen/unterminated-code-block.td | 2 +- 23 files changed, 81 insertions(+), 80 deletions(-) diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 759db6db60774c2..56abd03d6235415 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -60,8 +60,8 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) { uint64_t OldRes = Result; Result *= 10; Result += *Buffer-'0'; - if (Result < OldRes) { // Uh, oh, overflow detected!!! - LexError("constant bigger than 64 bits detected!"); + if (Result < OldRes) { // overflow detected. + LexError("constant bigger than 64 bits detected"); return 0; } } @@ -75,8 +75,8 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { Result *= 16; Result += hexDigitValue(*Buffer); - if (Result < OldRes) { // Uh, oh, overflow detected!!! - LexError("constant bigger than 64 bits detected!"); + if (Result < OldRes) { // overflow detected. + LexError("constant bigger than 64 bits detected"); return 0; } } @@ -99,7 +99,7 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End, Pair[1] += hexDigitValue(*Buffer); } if (Buffer != End) - LexError("constant bigger than 128 bits detected!"); + LexError("constant bigger than 128 bits detected"); } /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into @@ -118,7 +118,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, Pair[0] += hexDigitValue(*Buffer); } if (Buffer != End) - LexError("constant bigger than 128 bits detected!"); + LexError("constant bigger than 128 bits detected"); } // UnEscapeLexed - Run through the specified buffer and change \xx codes to the @@ -292,7 +292,7 @@ lltok::Kind LLLexer::LexDollar() { StrVal.assign(TokStart + 2, CurPtr - 1); UnEscapeLexed(StrVal); if (StringRef(StrVal).contains(0)) { - LexError("Null bytes are not allowed in names"); + LexError("NUL character is not allowed in names"); return lltok::Error; } return lltok::ComdatVar; @@ -354,7 +354,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { uint64_t Val = atoull(TokStart + 1, CurPtr); if ((unsigned)Val != Val) - LexError("invalid value number (too large)!"); + LexError("invalid value number (too large)"); UIntVal = unsigned(Val); return Token; } @@ -375,7 +375,7 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { StrVal.assign(TokStart+2, CurPtr-1); UnEscapeLexed(StrVal); if (StringRef(StrVal).contains(0)) { - LexError("Null bytes are not allowed in names"); + LexError("NUL character is not allowed in names"); return lltok::Error; } return Var; @@ -410,7 +410,7 @@ lltok::Kind LLLexer::LexQuote() { if (CurPtr[0] == ':') { ++CurPtr; if (StringRef(StrVal).contains(0)) { - LexError("Null bytes are not allowed in names"); + LexError("NUL character is not allowed in names"); kind = lltok::Error; } else { kind = lltok::LabelStr; @@ -492,7 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() { uint64_t NumBits = atoull(StartChar, CurPtr); if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { - LexError("bitwidth for integer type out of range!"); + LexError("bitwidth for integer type out of range"); return lltok::Error; } TyVal = IntegerType::get(Context, NumBits); @@ -1122,7 +1122,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { uint64_t Val = atoull(TokStart, CurPtr); ++CurPtr; // Skip the colon. if ((unsigned)Val != Val) - LexError("invalid value number (too large)!"); + LexError("invalid value number (too large)"); UIntVal = unsigned(Val); return lltok::LabelID; } diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index 8fe7f69ecf8e593..1e93b2c160ba58e 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -89,7 +89,7 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef Macros) : SrcMgr(SM) { for (StringRef MacroName : Macros) { const char *End = lexMacroName(MacroName); if (End != MacroName.end()) - PrintFatalError("Invalid macro name `" + MacroName + + PrintFatalError("invalid macro name `" + MacroName + "` specified on command line"); DefinedMacros.insert(MacroName); @@ -188,7 +188,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { return LexIdentifier(); // Unknown character, emit an error. - return ReturnError(TokStart, "Unexpected character"); + return ReturnError(TokStart, "unexpected character"); case EOF: // Lex next token, if we just left an include file. // Note that leaving an include file means that the next @@ -231,7 +231,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { ++CurPtr; // Eat third dot. return tgtok::dotdotdot; } - return ReturnError(TokStart, "Invalid '..' punctuation"); + return ReturnError(TokStart, "invalid '..' punctuation"); } return tgtok::dot; @@ -255,7 +255,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { if (SkipCComment()) return tgtok::Error; } else // Otherwise, this is an error. - return ReturnError(TokStart, "Unexpected character"); + return ReturnError(TokStart, "unexpected character"); return LexToken(FileOrLineStart); case '-': case '+': case '0': case '1': case '2': case '3': case '4': case '5': case '6': @@ -313,10 +313,10 @@ tgtok::TokKind TGLexer::LexString() { while (*CurPtr != '"') { // If we hit the end of the buffer, report an error. if (*CurPtr == 0 && CurPtr == CurBuf.end()) - return ReturnError(StrStart, "End of file in string literal"); + return ReturnError(StrStart, "end of file in string literal"); if (*CurPtr == '\n' || *CurPtr == '\r') - return ReturnError(StrStart, "End of line in string literal"); + return ReturnError(StrStart, "end of line in string literal"); if (*CurPtr != '\\') { CurStrVal += *CurPtr++; @@ -346,7 +346,7 @@ tgtok::TokKind TGLexer::LexString() { // If we hit the end of the buffer, report an error. case '\0': if (CurPtr == CurBuf.end()) - return ReturnError(StrStart, "End of file in string literal"); + return ReturnError(StrStart, "end of file in string literal"); [[fallthrough]]; default: return ReturnError(CurPtr, "invalid escape in string literal"); @@ -359,7 +359,7 @@ tgtok::TokKind TGLexer::LexString() { tgtok::TokKind TGLexer::LexVarName() { if (!isValidIDChar(CurPtr[0], /*First=*/true)) - return ReturnError(TokStart, "Invalid variable name"); + return ReturnError(TokStart, "invalid variable name"); // Otherwise, we're ok, consume the rest of the characters. const char *VarNameStart = CurPtr++; @@ -433,7 +433,7 @@ bool TGLexer::LexInclude() { tgtok::TokKind Tok = LexToken(); if (Tok == tgtok::Error) return true; if (Tok != tgtok::StrVal) { - PrintError(getLoc(), "Expected filename after include"); + PrintError(getLoc(), "expected filename after include"); return true; } @@ -444,7 +444,7 @@ bool TGLexer::LexInclude() { CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr), IncludedFile); if (!CurBuffer) { - PrintError(getLoc(), "Could not find include file '" + Filename + "'"); + PrintError(getLoc(), "could not find include file '" + Filename + "'"); return true; } @@ -476,7 +476,7 @@ bool TGLexer::SkipCComment() { int CurChar = getNextChar(); switch (CurChar) { case EOF: - PrintError(TokStart, "Unterminated comment!"); + PrintError(TokStart, "unterminated comment"); return true; case '*': // End of the comment? @@ -543,7 +543,7 @@ tgtok::TokKind TGLexer::LexNumber() { // Requires at least one digit. if (CurPtr == NumStart) - return ReturnError(TokStart, "Invalid number"); + return ReturnError(TokStart, "invalid number"); errno = 0; if (IsMinus) @@ -552,9 +552,9 @@ tgtok::TokKind TGLexer::LexNumber() { CurIntVal = strtoull(NumStart, nullptr, Base); if (errno == EINVAL) - return ReturnError(TokStart, "Invalid number"); + return ReturnError(TokStart, "invalid number"); if (errno == ERANGE) - return ReturnError(TokStart, "Number out of range"); + return ReturnError(TokStart, "number out of range"); return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal; } @@ -580,13 +580,13 @@ tgtok::TokKind TGLexer::LexBracket() { } } - return ReturnError(CodeStart - 2, "Unterminated code block"); + return ReturnError(CodeStart - 2, "unterminated code block"); } /// LexExclaim - Lex '!' and '![a-zA-Z]+'. tgtok::TokKind TGLexer::LexExclaim() { if (!isAlpha(*CurPtr)) - return ReturnError(CurPtr - 1, "Invalid \"!operator\""); + return ReturnError(CurPtr - 1, "invalid \"!operator\""); const char *Start = CurPtr++; while (isAlpha(*CurPtr)) @@ -648,7 +648,8 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("repr", tgtok::XRepr) .Default(tgtok::Error); - return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); + return Kind != tgtok::Error ? Kind + : ReturnError(Start - 1, "unknown operator"); } bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { @@ -662,17 +663,17 @@ bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { // Pop the preprocessing controls from the include stack. if (PrepIncludeStack.empty()) { - PrintFatalError("Preprocessor include stack is empty"); + PrintFatalError("preprocessor include stack is empty"); } PrepIncludeStack.pop_back(); if (IncludeStackMustBeEmpty) { if (!PrepIncludeStack.empty()) - PrintFatalError("Preprocessor include stack is not empty"); + PrintFatalError("preprocessor include stack is not empty"); } else { if (PrepIncludeStack.empty()) - PrintFatalError("Preprocessor include stack is empty"); + PrintFatalError("preprocessor include stack is empty"); } return true; @@ -732,7 +733,7 @@ bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) { return true; } - PrintFatalError("Unsupported preprocessing token in " + PrintFatalError("unsupported preprocessing token in " "prepEatPreprocessorDirective()"); return false; } @@ -748,7 +749,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, StringRef MacroName = prepLexMacroName(); StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef"; if (MacroName.empty()) - return ReturnError(TokStart, "Expected macro name after " + IfTokName); + return ReturnError(TokStart, "expected macro name after " + IfTokName); bool MacroIsDefined = DefinedMacros.count(MacroName) != 0; @@ -763,7 +764,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, {tgtok::Ifdef, MacroIsDefined, SMLoc::getFromPointer(TokStart)}); if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, "Only comments are supported after " + + return ReturnError(CurPtr, "only comments are supported after " + IfTokName + " NAME"); // If we were not processing tokens before this #ifdef, @@ -794,7 +795,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, if (IfdefEntry.Kind != tgtok::Ifdef) { PrintError(TokStart, "double #else"); - return ReturnError(IfdefEntry.SrcPos, "Previous #else is here"); + return ReturnError(IfdefEntry.SrcPos, "previous #else is here"); } // Replace the corresponding #ifdef's control with its negation @@ -804,7 +805,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)}); if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, "Only comments are supported after #else"); + return ReturnError(CurPtr, "only comments are supported after #else"); // If we were processing tokens before this #else, // we have to start skipping lines until the matching #endif. @@ -827,12 +828,12 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, if (IfdefOrElseEntry.Kind != tgtok::Ifdef && IfdefOrElseEntry.Kind != tgtok::Else) { - PrintFatalError("Invalid preprocessor control on the stack"); + PrintFatalError("invalid preprocessor control on the stack"); return tgtok::Error; } if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, "Only comments are supported after #endif"); + return ReturnError(CurPtr, "only comments are supported after #endif"); PrepIncludeStack.back()->pop_back(); @@ -847,15 +848,15 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, } else if (Kind == tgtok::Define) { StringRef MacroName = prepLexMacroName(); if (MacroName.empty()) - return ReturnError(TokStart, "Expected macro name after #define"); + return ReturnError(TokStart, "expected macro name after #define"); if (!DefinedMacros.insert(MacroName).second) PrintWarning(getLoc(), - "Duplicate definition of macro: " + Twine(MacroName)); + "duplicate definition of macro: " + Twine(MacroName)); if (!prepSkipDirectiveEnd()) return ReturnError(CurPtr, - "Only comments are supported after #define NAME"); + "only comments are supported after #define NAME"); if (!ReturnNextLiveToken) { PrintFatalError("#define must be ignored during the lines skipping"); @@ -865,13 +866,13 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, return LexToken(); } - PrintFatalError("Preprocessing directive is not supported"); + PrintFatalError("preprocessing directive is not supported"); return tgtok::Error; } bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { if (!MustNeverBeFalse) - PrintFatalError("Invalid recursion."); + PrintFatalError("invalid recursion."); do { // Skip all symbols to the line end. @@ -917,7 +918,7 @@ bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { // due to #else or #endif. if (prepIsProcessingEnabled()) { if (Kind != tgtok::Else && Kind != tgtok::Endif) { - PrintFatalError("Tokens processing was enabled by an unexpected " + PrintFatalError("tokens processing was enabled by an unexpected " "preprocessing directive"); return false; } @@ -1032,7 +1033,7 @@ bool TGLexer::prepSkipDirectiveEnd() { return false; } else { TokStart = CurPtr; - PrintError(CurPtr, "Unexpected character"); + PrintError(CurPtr, "unexpected character"); return false; } @@ -1067,8 +1068,8 @@ void TGLexer::prepReportPreprocessorStackError() { "empty control stack"); auto &PrepControl = PrepIncludeStack.back()->back(); - PrintError(CurBuf.end(), "Reached EOF without matching #endif"); - PrintError(PrepControl.SrcPos, "The latest preprocessor control is here"); + PrintError(CurBuf.end(), "reached EOF without matching #endif"); + PrintError(PrepControl.SrcPos, "the latest preprocessor control is here"); TokStart = CurPtr; } diff --git a/llvm/test/Assembler/invalid-inttype.ll b/llvm/test/Assembler/invalid-inttype.ll index c8aa7c66b79e4dd..9e3c31148af2d6a 100644 --- a/llvm/test/Assembler/invalid-inttype.ll +++ b/llvm/test/Assembler/invalid-inttype.ll @@ -1,5 +1,5 @@ ; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s ; i8388609 is the smallest integer type that can't be represented in LLVM IR -; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range! +; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range @i2 = common global i8388609 0, align 4 diff --git a/llvm/test/Assembler/invalid-name.ll b/llvm/test/Assembler/invalid-name.ll index 74133e60df54d595c68c50aea282cd90899f334f..52e2bda3adbabde32ffd154cf1cd8256e0f6ab81 100644 GIT binary patch delta 25 gcmX@lc#&~J3xBAOLUKl8QDSmQYLP-_@x+OC0DD;pkN^Mx delta 23 ecmcb}c%E@W3vX#ojzUspNouh|Vo~bE$#wv3hY1n@ diff --git a/llvm/test/Assembler/invalid-name2.ll b/llvm/test/Assembler/invalid-name2.ll index 8a848798a54cafef9c3c151b08f70955e59fa62f..78da4dc3d1b8d04c34b049c222d581088f506e7b 100644 GIT binary patch delta 25 gcmdnVxSMf83xBAOLUKl8QDSmQYLP-_@x+N50Cyt^P5=M^ delta 23 ecmdnZxRY^03vX#ojzUspNouh|Vo~bE$r=D@sR+;j diff --git a/llvm/test/TableGen/64-bit-int.td b/llvm/test/TableGen/64-bit-int.td index 2d2bdb8b560e2c3..d2a2999c14e9913 100644 --- a/llvm/test/TableGen/64-bit-int.td +++ b/llvm/test/TableGen/64-bit-int.td @@ -16,7 +16,7 @@ def { #ifdef OOR3 bits<64> Val = 0x10000000000000000; #endif -// CHECK-OOR: error: Number out of range +// CHECK-OOR: error: number out of range bits<64> BinVal0 = 0x8000000000000000; bits<64> HexVal0 = 0b1000000000000000000000000000000000000000000000000000000000000000; diff --git a/llvm/test/TableGen/invalid-macro-name-command-line.td b/llvm/test/TableGen/invalid-macro-name-command-line.td index 0d2307997ebe545..7d19e8996639af8 100644 --- a/llvm/test/TableGen/invalid-macro-name-command-line.td +++ b/llvm/test/TableGen/invalid-macro-name-command-line.td @@ -3,7 +3,7 @@ // RUN: not llvm-tblgen %s -D_MAC# 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-3 // RUN: not llvm-tblgen %s -D 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-4 -// CHECK-TEST-1: error: Invalid macro name `MACRO=1` specified on command line -// CHECK-TEST-2: error: Invalid macro name `0MAC` specified on command line -// CHECK-TEST-3: error: Invalid macro name `_MAC#` specified on command line +// CHECK-TEST-1: error: invalid macro name `MACRO=1` specified on command line +// CHECK-TEST-2: error: invalid macro name `0MAC` specified on command line +// CHECK-TEST-3: error: invalid macro name `_MAC#` specified on command line // CHECK-TEST-4: for the -D option: requires a value! diff --git a/llvm/test/TableGen/prep-diag1.td b/llvm/test/TableGen/prep-diag1.td index 41b7d477c6942e3..27f428f4fe95989 100644 --- a/llvm/test/TableGen/prep-diag1.td +++ b/llvm/test/TableGen/prep-diag1.td @@ -4,22 +4,22 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG3 %s #ifdef DIAG1 -// DIAG1: error: Only comments are supported after #define NAME +// DIAG1: error: only comments are supported after #define NAME #define ENABLED1/* */class C; #endif // DIAG1 #ifdef DIAG4 -// DIAG4: warning: Duplicate definition of macro: ENABLED1 +// DIAG4: warning: duplicate definition of macro: ENABLED1 #define ENABLED1 #define ENABLED1 #endif // DIAG4 #ifdef DIAG2 -// DIAG2: error: Only comments are supported after #ifdef NAME +// DIAG2: error: only comments are supported after #ifdef NAME // Invalid #ifdef below should be detected even if DIAG2 is not defined. -// DIAG3: error: Only comments are supported after #ifdef NAME +// DIAG3: error: only comments are supported after #ifdef NAME #ifdef DIAG2/* */class C; #endif diff --git a/llvm/test/TableGen/prep-diag10.td b/llvm/test/TableGen/prep-diag10.td index eb387a07b066ca4..cfcbab094ad73b7 100644 --- a/llvm/test/TableGen/prep-diag10.td +++ b/llvm/test/TableGen/prep-diag10.td @@ -1,6 +1,6 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED #else diff --git a/llvm/test/TableGen/prep-diag11.td b/llvm/test/TableGen/prep-diag11.td index 0042bc04f9e1014..1fe8a8503076e5f 100644 --- a/llvm/test/TableGen/prep-diag11.td +++ b/llvm/test/TableGen/prep-diag11.td @@ -1,7 +1,7 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED #else #define ENABLED diff --git a/llvm/test/TableGen/prep-diag12.td b/llvm/test/TableGen/prep-diag12.td index c26301ee17ac2b1..02ffa672b2fa050 100644 --- a/llvm/test/TableGen/prep-diag12.td +++ b/llvm/test/TableGen/prep-diag12.td @@ -1,7 +1,7 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED #else #define ENABLED diff --git a/llvm/test/TableGen/prep-diag13.td b/llvm/test/TableGen/prep-diag13.td index aa3fdab4802d379..733a46a16181318 100644 --- a/llvm/test/TableGen/prep-diag13.td +++ b/llvm/test/TableGen/prep-diag13.td @@ -1,7 +1,7 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED /* #else diff --git a/llvm/test/TableGen/prep-diag14.td b/llvm/test/TableGen/prep-diag14.td index cae9bc3b7f5b6c8..a3216ee4f471251 100644 --- a/llvm/test/TableGen/prep-diag14.td +++ b/llvm/test/TableGen/prep-diag14.td @@ -1,6 +1,6 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED // #endif diff --git a/llvm/test/TableGen/prep-diag2.td b/llvm/test/TableGen/prep-diag2.td index 741026b9c8a2d64..e51490600ff64f5 100644 --- a/llvm/test/TableGen/prep-diag2.td +++ b/llvm/test/TableGen/prep-diag2.td @@ -2,10 +2,10 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG2 %s #ifdef DIAG1 -// DIAG1: error: Only comments are supported after #else +// DIAG1: error: only comments are supported after #else // Invalid #else below should be detected even if DIAG1 is not defined. -// DIAG2: error: Only comments are supported after #else +// DIAG2: error: only comments are supported after #else #ifdef DIAG2//DIAG2 #else/* */class C; diff --git a/llvm/test/TableGen/prep-diag3.td b/llvm/test/TableGen/prep-diag3.td index fbedfa290b9947d..0b4d40307b40b06 100644 --- a/llvm/test/TableGen/prep-diag3.td +++ b/llvm/test/TableGen/prep-diag3.td @@ -2,10 +2,10 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG2 %s #ifdef DIAG1 -// DIAG1: error: Only comments are supported after #endif +// DIAG1: error: only comments are supported after #endif // Invalid #else below should be detected even if DIAG1 is not defined. -// DIAG2: error: Only comments are supported after #endif +// DIAG2: error: only comments are supported after #endif #ifdef DIAG2//DIAG2 #else/*!DIAG2*/ #endif/* !DIAG2 diff --git a/llvm/test/TableGen/prep-diag4.td b/llvm/test/TableGen/prep-diag4.td index 4661ef8667d23fc..ead116ebde0de8e 100644 --- a/llvm/test/TableGen/prep-diag4.td +++ b/llvm/test/TableGen/prep-diag4.td @@ -1,7 +1,7 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s // CHECK: error: double #else -// CHECK: error: Previous #else is here +// CHECK: error: previous #else is here #ifdef DIAG1 #else #else diff --git a/llvm/test/TableGen/prep-diag6.td b/llvm/test/TableGen/prep-diag6.td index f4202d115da59a4..bf1cd3d3490b5e2 100644 --- a/llvm/test/TableGen/prep-diag6.td +++ b/llvm/test/TableGen/prep-diag6.td @@ -1,6 +1,6 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Expected macro name after #ifdef +// CHECK: error: expected macro name after #ifdef #ifdef #else #else diff --git a/llvm/test/TableGen/prep-diag8.td b/llvm/test/TableGen/prep-diag8.td index 7a7bde62c79c4e1..82797d6cf4a62d8 100644 --- a/llvm/test/TableGen/prep-diag8.td +++ b/llvm/test/TableGen/prep-diag8.td @@ -1,5 +1,5 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Expected macro name after #define +// CHECK: error: expected macro name after #define #define #endif diff --git a/llvm/test/TableGen/prep-diag9.td b/llvm/test/TableGen/prep-diag9.td index 4ecff575cdc7bbd..6ad208104301bc4 100644 --- a/llvm/test/TableGen/prep-diag9.td +++ b/llvm/test/TableGen/prep-diag9.td @@ -1,5 +1,5 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED diff --git a/llvm/test/TableGen/prep-ifndef-diag-1.td b/llvm/test/TableGen/prep-ifndef-diag-1.td index 941f2d377a98a74..4a0d0754ed79063 100644 --- a/llvm/test/TableGen/prep-ifndef-diag-1.td +++ b/llvm/test/TableGen/prep-ifndef-diag-1.td @@ -1,4 +1,4 @@ // RUN: not llvm-tblgen %s 2>&1 | FileCheck %s -// CHECK: error: Expected macro name after #ifndef +// CHECK: error: expected macro name after #ifndef #ifndef 1 diff --git a/llvm/test/TableGen/prep-ifndef-diag-2.td b/llvm/test/TableGen/prep-ifndef-diag-2.td index 7b5f9dfd24b7861..c89cbab08e5c5cd 100644 --- a/llvm/test/TableGen/prep-ifndef-diag-2.td +++ b/llvm/test/TableGen/prep-ifndef-diag-2.td @@ -1,4 +1,4 @@ // RUN: not llvm-tblgen %s 2>&1 | FileCheck %s -// CHECK: error: Only comments are supported after #ifndef NAME +// CHECK: error: only comments are supported after #ifndef NAME #ifndef MACRO 42 diff --git a/llvm/test/TableGen/unterminated-c-comment.td b/llvm/test/TableGen/unterminated-c-comment.td index 0f4cd9d633c66d9..b5b995342be744a 100644 --- a/llvm/test/TableGen/unterminated-c-comment.td +++ b/llvm/test/TableGen/unterminated-c-comment.td @@ -1,5 +1,5 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Unterminated comment! +// CHECK: error: unterminated comment include "unterminated-c-comment-include.inc" */ diff --git a/llvm/test/TableGen/unterminated-code-block.td b/llvm/test/TableGen/unterminated-code-block.td index d6b6f50827a6725..5bd4cd7e17d827c 100644 --- a/llvm/test/TableGen/unterminated-code-block.td +++ b/llvm/test/TableGen/unterminated-code-block.td @@ -1,5 +1,5 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Unterminated code block +// CHECK: error: unterminated code block include "unterminated-code-block-include.inc" }]>; From 3754fc1e9af38951aa00181c0e8110174d3f94fd Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 29 Oct 2024 12:38:56 -0700 Subject: [PATCH 345/425] [hwasan] Flush stderr/stdout in tests (#114083) The x86_64_lam_qemu buildbots started failing (https://lab.llvm.org/buildbot/#/builders/139/builds/5462/steps/2/logs/stdio). Based on the logs, it appears the HWASan check is correct but it did not match the stderr/stdout output. This patch attempts to fix the issue by flushing stderr/stdout as appropriate. --- compiler-rt/test/hwasan/TestCases/many-threads-uaf.c | 1 + compiler-rt/test/hwasan/TestCases/mem-intrinsics.c | 1 + compiler-rt/test/hwasan/TestCases/use-after-free.c | 1 + 3 files changed, 3 insertions(+) diff --git a/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c b/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c index 8fa07861371d56d..e02ab5b28ce046c 100644 --- a/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c +++ b/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c @@ -23,6 +23,7 @@ void *BoringThread(void *arg) { void *UAFThread(void *arg) { char * volatile x = (char*)malloc(10); fprintf(stderr, "ZZZ %p\n", x); + fflush(stderr); free(x); x[5] = 42; // CHECK: ERROR: HWAddressSanitizer: tag-mismatch on address diff --git a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c index 78bef538af11610..da1cb6869692066 100644 --- a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c +++ b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c @@ -21,6 +21,7 @@ int main() { memcpy(Q, P, 32); #endif write(STDOUT_FILENO, "recovered\n", 10); + fflush(stdout); // WRITE: ERROR: HWAddressSanitizer: tag-mismatch on address // WRITE: WRITE of size 32 at {{.*}} tags: [[PTR_TAG:..]]/[[MEM_TAG:..]] (ptr/mem) // WRITE: Invalid access starting at offset 16 diff --git a/compiler-rt/test/hwasan/TestCases/use-after-free.c b/compiler-rt/test/hwasan/TestCases/use-after-free.c index 070622f560a2255..b4b79875e8111eb 100644 --- a/compiler-rt/test/hwasan/TestCases/use-after-free.c +++ b/compiler-rt/test/hwasan/TestCases/use-after-free.c @@ -15,6 +15,7 @@ int main() { free(x); __hwasan_disable_allocator_tagging(); fprintf(stderr, ISREAD ? "Going to do a READ\n" : "Going to do a WRITE\n"); + fflush(stderr); // CHECK: Going to do a [[TYPE:[A-Z]*]] int r = 0; if (ISREAD) r = x[5]; else x[5] = 42; // should be on the same line. From 3a1228a543bc85e225809b1f3033fac744f1f122 Mon Sep 17 00:00:00 2001 From: Adam Yang Date: Tue, 29 Oct 2024 12:40:01 -0700 Subject: [PATCH 346/425] [SPIRV] Add GroupMemoryBarrierWithGroupSync intrinsic (#111888) partially fixes #70103 ### Changes * Added int_spv_group_memory_barrier_with_group_sync intrinsic in IntrinsicsSPIRV.td * Added lowering for int_spv_group_memory_barrier_with_group_sync in SPIRVInstructionSelector.cpp * Added SPIRV backend test case ### Related PRs * [[clang][HLSL] Add GroupMemoryBarrierWithGroupSync intrinsic #111883](https://github.com/llvm/llvm-project/pull/111883) * [[DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic #111884](https://github.com/llvm/llvm-project/pull/111884) --- llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 + llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 11 +++++++++++ .../group_memory_barrier_with_group_sync.ll | 14 ++++++++++++++ 3 files changed, 26 insertions(+) create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 6df2eb156a07749..ddb47390537412a 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -87,6 +87,7 @@ let TargetPrefix = "spv" in { def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>; def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; + def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>; // Create resource handle given the binding information. Returns a // type appropriate for the kind of resource given the set id, binding id, diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index d9377fe4b91a1ad..11ed7d660be09e7 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -2547,6 +2547,17 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_group_memory_barrier_with_group_sync: { + Register MemSemReg = + buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I); + Register ScopeReg = buildI32Constant(SPIRV::Scope::Workgroup, I); + MachineBasicBlock &BB = *I.getParent(); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpControlBarrier)) + .addUse(ScopeReg) + .addUse(ScopeReg) + .addUse(MemSemReg) + .constrainAllUses(TII, TRI, RBI); + } break; case Intrinsic::spv_lifetime_start: case Intrinsic::spv_lifetime_end: { unsigned Op = IID == Intrinsic::spv_lifetime_start ? SPIRV::OpLifetimeStart diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll new file mode 100644 index 000000000000000..6955411a0e4e99b --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll @@ -0,0 +1,14 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpMemoryModel Logical GLSL450 + +define void @test_group_memory_barrier_with_group_sync() { +entry: + ; CHECK: %[[#TY:]] = OpTypeInt 32 0 + ; CHECK-DAG: %[[#MEM_SEM:]] = OpConstant %[[#TY]] 16 + ; CHECK-DAG: %[[#EXEC_AND_MEM_SCOPE:]] = OpConstant %[[#TY]] 2 + ; CHECK: OpControlBarrier %[[#EXEC_AND_MEM_SCOPE]] %[[#EXEC_AND_MEM_SCOPE]] %[[#MEM_SEM]] + call void @llvm.spv.group.memory.barrier.with.group.sync() + ret void +} From e205929399d9ee4782b2d8ef1b659f918bdfe7c2 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 29 Oct 2024 12:40:54 -0700 Subject: [PATCH 347/425] [asan] Flush stderr in test (#114084) This is the ASan equivalent of https://github.com/llvm/llvm-project/pull/114083. The x86_64_lam_qemu buildbots started failing (https://lab.llvm.org/buildbot/#/builders/139/builds/5462/steps/2/logs/stdio). Based on the logs, it appears the ASan check is correct but it did not match the stderr/stdout output. This patch attempts to fix the issue by flushing stderr as appropriate. --- compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp b/compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp index 87be90014d56e82..dfeb8ad5c7b53fa 100644 --- a/compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp @@ -26,14 +26,17 @@ bool ignore_free = false; extern "C" { WEAK_ON_APPLE void __sanitizer_free_hook(const volatile void *ptr) { - if (ptr == glob_ptr) + if (ptr == glob_ptr) { fprintf(stderr, "Free Hook\n"); + fflush(stderr); + } } WEAK_ON_APPLE int __sanitizer_ignore_free_hook(const volatile void *ptr) { if (ptr != glob_ptr) return 0; fprintf(stderr, ignore_free ? "Free Ignored\n" : "Free Respected\n"); + fflush(stderr); return ignore_free; } } // extern "C" From 8a0cb9ac869334fd6c6bd6aad8408623a7ccd7f6 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas Date: Tue, 29 Oct 2024 15:43:05 -0400 Subject: [PATCH 348/425] [PowerPC] Add custom lowering for ssubo (#111748) This patch is to improve the codegen for ssubo node for i32 in 64-bit mode by custom lowering. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 37 +++++++++++++++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 + llvm/test/CodeGen/PowerPC/saddo-ssubo.ll | 11 +++--- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ab31898e262e7ed..d8f3095ed7fb68a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -200,6 +200,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom); + // On P10, the default lowering generates better code using the + // setbc instruction. + if (!Subtarget.hasP10Vector() && isPPC64) + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + // Match BITREVERSE to customized fast code sequence in the td file. setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); @@ -12016,6 +12021,36 @@ SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const { return Res; } +SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { + + SDLoc dl(Op); + + SDValue LHS64 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Op.getOperand(0)); + SDValue RHS64 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Op.getOperand(1)); + + SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i64, LHS64, RHS64); + + SDValue Extsw = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, Sub, + DAG.getValueType(MVT::i32)); + + SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i64, Extsw, Sub); + + SDValue Addic = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(MVT::i64, MVT::Glue), + Xor, DAG.getConstant(-1, dl, MVT::i64)); + + SDValue Overflow = + DAG.getNode(ISD::SUBE, dl, DAG.getVTList(MVT::i64, MVT::Glue), Xor, Addic, + Addic.getValue(1)); + + SDValue OverflowTrunc = + DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow); + SDValue SubTrunc = + (Sub->getValueType(0) != Op.getNode()->getValueType(0)) + ? DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(0), Sub) + : Sub; + return DAG.getMergeValues({SubTrunc, OverflowTrunc}, dl); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12038,6 +12073,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::SSUBO: + return LowerSSUBO(Op, DAG); case ISD::INLINEASM: case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 0adbad868459731..dde45e4cf6f4ae6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1279,6 +1279,7 @@ namespace llvm { SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll index fd5f26ba35742f3..7147257d27c4b8b 100644 --- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll +++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll @@ -129,12 +129,11 @@ entry: define i1 @test_ssubo_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_ssubo_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sub 5, 3, 4 -; CHECK-NEXT: cmpwi 1, 4, 0 -; CHECK-NEXT: cmpw 5, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: creqv 20, 5, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: sub 3, 3, 4 +; CHECK-NEXT: extsw 4, 3 +; CHECK-NEXT: xor 3, 4, 3 +; CHECK-NEXT: addic 4, 3, -1 +; CHECK-NEXT: subfe 3, 4, 3 ; CHECK-NEXT: blr entry: %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind From 27ef549af2c2f60d05f38db1ecc7a8ad7294351d Mon Sep 17 00:00:00 2001 From: z1nke Date: Wed, 30 Oct 2024 03:48:39 +0800 Subject: [PATCH 349/425] [clang-tidy] Fix crash in modernize-use-designated-initializers check (#113688) Fix #113652. When calling `Node.isAggregate()` and `Node.isPOD()`, if `Node` is declared but not defined, it will result in null pointer dereference (and if assertions are enabled, it will cause an assertion failure). --- .../modernize/UseDesignatedInitializersCheck.cpp | 8 ++++++-- clang-tools-extra/docs/ReleaseNotes.rst | 4 ++++ .../checkers/modernize/use-designated-initializers.cpp | 8 ++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp index 2a0cc403b726e8d..3132067f3d5ece6 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp @@ -80,9 +80,13 @@ unsigned getNumberOfDesignated(const InitListExpr *SyntacticInitList) { }); } -AST_MATCHER(CXXRecordDecl, isAggregate) { return Node.isAggregate(); } +AST_MATCHER(CXXRecordDecl, isAggregate) { + return Node.hasDefinition() && Node.isAggregate(); +} -AST_MATCHER(CXXRecordDecl, isPOD) { return Node.isPOD(); } +AST_MATCHER(CXXRecordDecl, isPOD) { + return Node.hasDefinition() && Node.isPOD(); +} AST_MATCHER(InitListExpr, isFullyDesignated) { if (const InitListExpr *SyntacticForm = diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 54118e5f92f4173..ccebf74e8a67e73 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -216,6 +216,10 @@ Changes in existing checks a false positive when only an implicit conversion happened inside an initializer list. +- Improved :doc:`modernize-use-designated-initializers + ` check to fix a + crash when a class is declared but not defined. + - Improved :doc:`modernize-use-nullptr ` check to also recognize ``NULL``/``__null`` (but not ``0``) when used with a templated type. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp index 9b769ad0be23cab..048665b2e54ac5a 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp @@ -201,3 +201,11 @@ DECLARE_S93; // CHECK-MESSAGES-MACROS: :[[@LINE-1]]:1: warning: use designated initializer list to initialize 'S9' [modernize-use-designated-initializers] // CHECK-MESSAGES-MACROS: :[[@LINE-4]]:28: note: expanded from macro 'DECLARE_S93' // CHECK-MESSAGES-MACROS: :[[@LINE-71]]:1: note: aggregate type is defined here + +// Issue #113652. +struct S14; + +struct S15{ + S15(S14& d):d{d}{} + S14& d; +}; From 5c12434906d85dde4d44036cfb564fd366d9a1a4 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Tue, 29 Oct 2024 03:46:04 +0000 Subject: [PATCH 350/425] [X86] Emit comments explaining the immediate in vfpclass This makes the assembly a lot more readable at a glance. As an example: ``` vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ``` --- .../X86/MCTargetDesc/X86InstComments.cpp | 81 +++++++++++++++++++ .../CodeGen/X86/avx10_2_512bf16-intrinsics.ll | 10 ++- .../CodeGen/X86/avx10_2bf16-intrinsics.ll | 2 + .../X86/avx512dq-intrinsics-fast-isel.ll | 24 +++--- .../X86/avx512dq-intrinsics-upgrade.ll | 4 + llvm/test/CodeGen/X86/avx512dq-intrinsics.ll | 12 +++ .../X86/avx512dqvl-intrinsics-fast-isel.ll | 12 +-- .../X86/avx512dqvl-intrinsics-upgrade.ll | 4 + .../test/CodeGen/X86/avx512dqvl-intrinsics.ll | 4 + .../X86/stack-folding-fp-avx512fp16.ll | 4 + .../X86/stack-folding-fp-avx512fp16vl.ll | 2 + 11 files changed, 137 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index 587f923e789f02f..49e8bab4c0363da 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -40,6 +40,20 @@ using namespace llvm; CASE_MASK_INS_COMMON(Inst, Suffix, src) \ CASE_MASKZ_INS_COMMON(Inst, Suffix, src) +#define CASE_FPCLASS_PACKED(Inst, src) \ + CASE_AVX_INS_COMMON(Inst, Z, r##src) \ + CASE_AVX_INS_COMMON(Inst, Z256, r##src) \ + CASE_AVX_INS_COMMON(Inst, Z128, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src) + +#define CASE_FPCLASS_PACKED_MEM(Inst) \ + CASE_FPCLASS_PACKED(Inst, m) \ + CASE_FPCLASS_PACKED(Inst, mb) + +#define CASE_FPCLASS_SCALAR(Inst, src) \ + CASE_AVX_INS_COMMON(Inst, Z, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src) + #define CASE_PTERNLOG(Inst, src) \ CASE_AVX512_INS_COMMON(Inst, Z, r##src##i) \ CASE_AVX512_INS_COMMON(Inst, Z256, r##src##i) \ @@ -949,6 +963,70 @@ static bool printPTERNLOGComments(const MCInst *MI, raw_ostream &OS, return true; } +static bool printFPCLASSComments(const MCInst *MI, raw_ostream &OS, + const MCInstrInfo &MCII) { + unsigned NumOperands = MI->getNumOperands(); + int SrcIdx; + switch (MI->getOpcode()) { + CASE_FPCLASS_PACKED(FPCLASSPBF16, r) + CASE_FPCLASS_PACKED(FPCLASSPH, r) + CASE_FPCLASS_PACKED(FPCLASSPS, r) + CASE_FPCLASS_PACKED(FPCLASSPD, r) + CASE_FPCLASS_SCALAR(FPCLASSSH, r) + CASE_FPCLASS_SCALAR(FPCLASSSS, r) + CASE_FPCLASS_SCALAR(FPCLASSSD, r) { + SrcIdx = NumOperands - 2; + break; + } + CASE_FPCLASS_PACKED_MEM(FPCLASSPBF16) + CASE_FPCLASS_PACKED_MEM(FPCLASSPH) + CASE_FPCLASS_PACKED_MEM(FPCLASSPS) + CASE_FPCLASS_PACKED_MEM(FPCLASSPD) + CASE_FPCLASS_SCALAR(FPCLASSSH, m) + CASE_FPCLASS_SCALAR(FPCLASSSS, m) + CASE_FPCLASS_SCALAR(FPCLASSSD, m) { + SrcIdx = -1; + break; + } + default: + return false; + } + StringRef DestName = getRegName(MI->getOperand(0).getReg()); + StringRef SrcName = + SrcIdx != -1 ? getRegName(MI->getOperand(SrcIdx).getReg()) : "mem"; + + OS << DestName; + printMasking(OS, MI, MCII); + OS << " = "; + + uint8_t Categories = MI->getOperand(NumOperands - 1).getImm(); + if (Categories == 0) { + OS << "false"; + } else { + static constexpr StringLiteral CategoryNames[] = { + "QuietNaN", + "PositiveZero", + "NegativeZero", + "PositiveInfinity", + "NegativeInfinity", + "Subnormal", + "Negative", + "SignalingNaN", + }; + bool Conjoin = false; + for (size_t I = 0, E = std::size(CategoryNames); I != E; ++I) { + if (Categories & (1 << I)) { + if (Conjoin) + OS << " | "; + Conjoin = true; + OS << "is" << CategoryNames[I] << '(' << SrcName << ')'; + } + } + } + OS << '\n'; + return true; +} + //===----------------------------------------------------------------------===// // Top Level Entrypoint //===----------------------------------------------------------------------===// @@ -970,6 +1048,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, if (printPTERNLOGComments(MI, OS, MCII)) return true; + if (printFPCLASSComments(MI, OS, MCII)) + return true; + switch (MI->getOpcode()) { default: // Not an instruction for which we can decode comments. diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll index 7b81d547db085c5..5f2bcf0556b021e 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll @@ -76,13 +76,15 @@ declare <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat>, i32) define i32 @test_int_x86_avx512_fpclass_nepbf16_512(<32 x bfloat> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_512: ; CHECK: # %bb.0: -; CHECK-NEXT: vfpclasspbf16 $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x02] -; CHECK-NEXT: vfpclasspbf16 $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: vfpclasspbf16 $6, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x06] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) | isNegativeZero(zmm0) +; CHECK-NEXT: vfpclasspbf16 $0, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x00] +; CHECK-NEXT: # k0 {%k1} = false ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 4) - %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 2) + %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 0) + %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 6) %1 = and <32 x i1> %res1, %res %2 = bitcast <32 x i1> %1 to i32 ret i32 %2 diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll index 559d866b55cc7b6..59151d4dd96099e 100644 --- a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll @@ -298,6 +298,7 @@ define i8 @test_int_x86_avx512_fpclass_nepbf16_128(<8 x bfloat> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspbf16 $2, %xmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x08,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(xmm0) ; CHECK-NEXT: vfpclasspbf16 $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x66,0xc0,0x04] ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -313,6 +314,7 @@ define i16 @test_int_x86_avx512_fpclass_nepbf16_256(<16 x bfloat> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspbf16 $2, %ymm0, %k1 # encoding: [0x62,0xf3,0x7f,0x28,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(ymm0) ; CHECK-NEXT: vfpclasspbf16 $4, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x66,0xc0,0x04] ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll index 64063bdf8333eb5..53193597d62f08f 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll @@ -7,7 +7,7 @@ define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double> %__A) { ; X86-LABEL: test_mm512_mask_fpclass_pd_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: vfpclasspd $4, %zmm0, %k0 +; X86-NEXT: vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -16,7 +16,7 @@ define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double> ; ; X64-LABEL: test_mm512_mask_fpclass_pd_mask: ; X64: # %bb.0: # %entry -; X64-NEXT: vfpclasspd $4, %zmm0, %k0 +; X64-NEXT: vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: andb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -35,7 +35,7 @@ declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32) define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) { ; CHECK-LABEL: test_mm512_fpclass_pd_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 +; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper @@ -49,7 +49,7 @@ entry: define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x float> %__A) { ; X86-LABEL: test_mm512_mask_fpclass_ps_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: vfpclassps $4, %zmm0, %k0 +; X86-NEXT: vfpclassps $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -58,7 +58,7 @@ define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x floa ; ; X64-LABEL: test_mm512_mask_fpclass_ps_mask: ; X64: # %bb.0: # %entry -; X64-NEXT: vfpclassps $4, %zmm0, %k0 +; X64-NEXT: vfpclassps $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax @@ -77,7 +77,7 @@ declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32) define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) { ; CHECK-LABEL: test_mm512_fpclass_ps_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 +; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper @@ -91,7 +91,7 @@ entry: define zeroext i8 @test_mm_fpclass_sd_mask(<4 x float> %__A) { ; CHECK-LABEL: test_mm_fpclass_sd_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 +; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} @@ -107,7 +107,7 @@ define zeroext i8 @test_mm_mask_fpclass_sd_mask(i8 zeroext %__U, <4 x float> %__ ; X86-LABEL: test_mm_mask_fpclass_sd_mask: ; X86: # %bb.0: # %entry ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} +; X86-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl @@ -115,7 +115,7 @@ define zeroext i8 @test_mm_mask_fpclass_sd_mask(i8 zeroext %__U, <4 x float> %__ ; X64-LABEL: test_mm_mask_fpclass_sd_mask: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} +; X64-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -128,7 +128,7 @@ entry: define zeroext i8 @test_mm_fpclass_ss_mask(<4 x float> %__A) { ; CHECK-LABEL: test_mm_fpclass_ss_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclassss $2, %xmm0, %k0 +; CHECK-NEXT: vfpclassss $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} @@ -143,7 +143,7 @@ define zeroext i8 @test_mm_mask_fpclass_ss_mask(i8 zeroext %__U, <4 x float> %__ ; X86-LABEL: test_mm_mask_fpclass_ss_mask: ; X86: # %bb.0: # %entry ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} +; X86-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl @@ -151,7 +151,7 @@ define zeroext i8 @test_mm_mask_fpclass_ss_mask(i8 zeroext %__U, <4 x float> %__ ; X64-LABEL: test_mm_mask_fpclass_ss_mask: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} +; X64-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll index 23e929aa9d89b1c..8a0428d022b6d7f 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -654,7 +654,9 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) ; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -669,7 +671,9 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll index 027bca9c8badf9c..70f60c802a2d521 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -726,7 +726,9 @@ define i8 @test_int_x86_avx512_fpclass_pd_512(<8 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) ; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -743,7 +745,9 @@ define i16@test_int_x86_avx512_fpclass_ps_512(<16 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -761,7 +765,9 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasssd $4, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(xmm0) ; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x67,0xc0,0x02] +; CHECK-NEXT: # k0 {%k1} = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] @@ -775,6 +781,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd_load(ptr %x0ptr) { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vfpclasssd $4, (%eax), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x00,0x04] +; X86-NEXT: # k0 = isNegativeZero(mem) ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl # encoding: [0xc3] @@ -782,6 +789,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd_load(ptr %x0ptr) { ; X64-LABEL: test_int_x86_avx512_mask_fpclass_sd_load: ; X64: # %bb.0: ; X64-NEXT: vfpclasssd $4, (%rdi), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x07,0x04] +; X64-NEXT: # k0 = isNegativeZero(mem) ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq # encoding: [0xc3] @@ -796,7 +804,9 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassss $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(xmm0) ; CHECK-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x67,0xc0,0x02] +; CHECK-NEXT: # k0 {%k1} = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] @@ -810,6 +820,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(ptr %x0ptr, i8 %x1) { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vfpclassss $4, (%eax), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x00,0x04] +; X86-NEXT: # k0 = isNegativeZero(mem) ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl # encoding: [0xc3] @@ -817,6 +828,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(ptr %x0ptr, i8 %x1) { ; X64-LABEL: test_int_x86_avx512_mask_fpclass_ss_load: ; X64: # %bb.0: ; X64-NEXT: vfpclassss $4, (%rdi), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x07,0x04] +; X64-NEXT: # k0 = isNegativeZero(mem) ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll index 703591acef57207..a8a38d9c4811331 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll @@ -235,7 +235,7 @@ declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32) define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) { ; CHECK-LABEL: test_mm_fpclass_pd_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 +; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} @@ -279,7 +279,7 @@ declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32) define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) { ; CHECK-LABEL: test_mm256_fpclass_pd_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 +; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper @@ -322,7 +322,7 @@ declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32) define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) { ; CHECK-LABEL: test_mm_fpclass_ps_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 +; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} @@ -336,7 +336,7 @@ entry: define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_mask_fpclass_ps_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: vfpclassps $2, %ymm0, %k0 +; X86-NEXT: vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -345,7 +345,7 @@ define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> ; ; X64-LABEL: test_mm256_mask_fpclass_ps_mask: ; X64: # %bb.0: # %entry -; X64-NEXT: vfpclassps $2, %ymm0, %k0 +; X64-NEXT: vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: andb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -364,7 +364,7 @@ declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32) define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) { ; CHECK-LABEL: test_mm256_fpclass_ps_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 +; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index 8d609eb7fdd0092..f31dafcd6862695 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -2921,6 +2921,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(xmm0) ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -2936,6 +2937,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(ymm0) ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -2952,6 +2954,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(xmm0) ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -2967,6 +2970,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(ymm0) ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 3b9f96ef452c323..ec94b593148dfac 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -1500,6 +1500,7 @@ define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(xmm0) ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -1518,6 +1519,7 @@ define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(ymm0) ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -1536,6 +1538,7 @@ define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(xmm0) ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -1554,6 +1557,7 @@ define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(ymm0) ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll index ed7109c416e7fda..9382ba31ab649d6 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll @@ -234,6 +234,7 @@ define i32 @stack_fold_fpclassph(<32 x half> %a0) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vfpclassphz $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 64-byte Folded Reload +; CHECK-NEXT: # k0 = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -253,6 +254,7 @@ define i32 @stack_fold_fpclassph_mask(<32 x half> %a0, ptr %p) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovd (%rdi), %k1 ; CHECK-NEXT: vfpclassphz $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 {%k1} # 64-byte Folded Reload +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -272,6 +274,7 @@ define i8 @stack_fold_fpclasssh(<8 x half> %a0) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vfpclasssh $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 16-byte Folded Reload +; CHECK-NEXT: # k0 = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -290,6 +293,7 @@ define i8 @stack_fold_fpclasssh_mask(<8 x half> %a0, ptr %p) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovb (%rdi), %k1 ; CHECK-NEXT: vfpclasssh $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 {%k1} # 16-byte Folded Reload +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll index e2ed997783f59b8..3386f4a9b519813 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll @@ -110,6 +110,7 @@ define i8 @stack_fold_fpclassph(<8 x half> %a0) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vfpclassphx $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 16-byte Folded Reload +; CHECK-NEXT: # k0 = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -148,6 +149,7 @@ define i16 @stack_fold_fpclassph_ymm(<16 x half> %a0) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vfpclassphy $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 32-byte Folded Reload +; CHECK-NEXT: # k0 = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper From ba8d9ce8d4f0665f29bb4bb43ce16d02acaed751 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Tue, 29 Oct 2024 13:00:59 -0700 Subject: [PATCH 351/425] [ADT] Fix unused variable from #69528 (#114114) Remove unused variable to fix build failures from bot. --- llvm/lib/Support/TrieRawHashMap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/TrieRawHashMap.cpp b/llvm/lib/Support/TrieRawHashMap.cpp index 9eeac0bbc5c2c3a..4741f3d4db0490f 100644 --- a/llvm/lib/Support/TrieRawHashMap.cpp +++ b/llvm/lib/Support/TrieRawHashMap.cpp @@ -424,7 +424,7 @@ unsigned ThreadSafeTrieRawHashMapBase::getNumSlotUsed( return 0; unsigned Num = 0; for (unsigned I = 0, E = S->size(); I < E; ++I) - if (auto *E = S->load(I)) + if (S->load(I)) ++Num; return Num; } From ccd73eeab34b31c7c38e9aca05ca4192fb0913b0 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 29 Oct 2024 13:06:55 -0700 Subject: [PATCH 352/425] [LinkerWrapper] Remove in-house handling of LTO (#113715) Summary: This should be the linker's job if the user creates any bitcode files, then passing `-flto` to the linker for the toolchain should be able to handle it. Right now this path is only used in the case where someone does LTO w/ ld.gold targeting a CPU so I think we are safe here as that will still be forwarded, for bfd it'll be an error as it would on the host. I think I talked the SYCL team out of using this as well so I should be good to delete it. --- .../ClangLinkerWrapper.cpp | 362 +----------------- 1 file changed, 8 insertions(+), 354 deletions(-) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 9fcecaee318a79f..45274b797236c9b 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -140,9 +140,6 @@ static std::list> TempFiles; /// Codegen flags for LTO backend. static codegen::RegisterCodeGenFlags CodeGenFlags; -/// Global flag to indicate that the LTO pipeline threw an error. -static std::atomic LTOError; - using OffloadingImage = OffloadBinary::OffloadingImage; namespace llvm { @@ -293,12 +290,10 @@ Expected findProgram(StringRef Name, ArrayRef Paths) { return *Path; } -/// We will defer LTO to the target's linker if we are not doing JIT and it is -/// supported by the toolchain. bool linkerSupportsLTO(const ArgList &Args) { llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); return Triple.isNVPTX() || Triple.isAMDGPU() || - Args.getLastArgValue(OPT_linker_path_EQ).ends_with("ld.lld"); + Args.getLastArgValue(OPT_linker_path_EQ).ends_with("lld"); } /// Returns the hashed value for a constant string. @@ -528,13 +523,11 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { }; // Forward all of the `--offload-opt` and similar options to the device. - if (linkerSupportsLTO(Args)) { - CmdArgs.push_back("-flto"); - for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) - CmdArgs.append( - {"-Xlinker", - Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); - } + CmdArgs.push_back("-flto"); + for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) + CmdArgs.append( + {"-Xlinker", + Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); if (!Triple.isNVPTX()) CmdArgs.push_back("-Wl,--no-undefined"); @@ -655,7 +648,6 @@ void diagnosticHandler(const DiagnosticInfo &DI) { switch (DI.getSeverity()) { case DS_Error: WithColor::error(errs(), LinkerExecutable) << ErrStorage << "\n"; - LTOError = true; break; case DS_Warning: WithColor::warning(errs(), LinkerExecutable) << ErrStorage << "\n"; @@ -669,334 +661,6 @@ void diagnosticHandler(const DiagnosticInfo &DI) { } } -// Get the list of target features from the input file and unify them such that -// if there are multiple +xxx or -xxx features we only keep the last one. -std::vector getTargetFeatures(ArrayRef InputFiles) { - SmallVector Features; - for (const OffloadFile &File : InputFiles) { - for (auto Arg : llvm::split(File.getBinary()->getString("feature"), ",")) - Features.emplace_back(Arg); - } - - // Only add a feature if it hasn't been seen before starting from the end. - std::vector UnifiedFeatures; - DenseSet UsedFeatures; - for (StringRef Feature : llvm::reverse(Features)) { - if (UsedFeatures.insert(Feature.drop_front()).second) - UnifiedFeatures.push_back(Feature.str()); - } - - return UnifiedFeatures; -} - -template > -std::unique_ptr createLTO( - const ArgList &Args, const std::vector &Features, - ModuleHook Hook = [](size_t, const Module &) { return true; }) { - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - // We need to remove AMD's target-id from the processor if present. - StringRef TargetID = Args.getLastArgValue(OPT_arch_EQ); - StringRef Arch = clang::getProcessorFromTargetID(Triple, TargetID); - lto::Config Conf; - lto::ThinBackend Backend; - // TODO: Handle index-only thin-LTO - Backend = - lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency()); - - Conf.CPU = Arch.str(); - Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple); - - Conf.RemarksFilename = RemarksFilename; - Conf.RemarksPasses = RemarksPasses; - Conf.RemarksWithHotness = RemarksWithHotness; - Conf.RemarksHotnessThreshold = RemarksHotnessThreshold; - Conf.RemarksFormat = RemarksFormat; - - StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); - Conf.MAttrs = Features; - std::optional CGOptLevelOrNone = - CodeGenOpt::parseLevel(OptLevel[1]); - assert(CGOptLevelOrNone && "Invalid optimization level"); - Conf.CGOptLevel = *CGOptLevelOrNone; - Conf.OptLevel = OptLevel[1] - '0'; - Conf.DefaultTriple = Triple.getTriple(); - - // TODO: Should we complain about combining --opt-level and -passes, as opt - // does? That might be too limiting in clang-linker-wrapper, so for now we - // just warn in the help entry for -passes that the default corresponding - // to --opt-level=O? should be included there. The problem is that - // --opt-level produces effects in clang-linker-wrapper beyond what -passes - // appears to be able to achieve, so rejecting the combination of --opt-level - // and -passes would apparently make it impossible to combine those effects - // with a custom pass pipeline. - Conf.OptPipeline = PassPipeline; - Conf.PassPlugins = PassPlugins; - - LTOError = false; - Conf.DiagHandler = diagnosticHandler; - - Conf.PTO.LoopVectorization = Conf.OptLevel > 1; - Conf.PTO.SLPVectorization = Conf.OptLevel > 1; - - if (SaveTemps) { - std::string TempName = (sys::path::filename(ExecutableName) + "." + - Triple.getTriple() + "." + TargetID) - .str(); - Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) { - std::string File = - !Task ? TempName + ".postlink.bc" - : TempName + "." + std::to_string(Task) + ".postlink.bc"; - error_code EC; - raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - return true; - }; - Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) { - std::string File = - !Task ? TempName + ".postopt.bc" - : TempName + "." + std::to_string(Task) + ".postopt.bc"; - error_code EC; - raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - return true; - }; - } - Conf.PostOptModuleHook = Hook; - Conf.CGFileType = (Triple.isNVPTX() || SaveTemps) - ? CodeGenFileType::AssemblyFile - : CodeGenFileType::ObjectFile; - - // TODO: Handle remark files - Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program); - - return std::make_unique(std::move(Conf), Backend); -} - -// Returns true if \p S is valid as a C language identifier and will be given -// `__start_` and `__stop_` symbols. -bool isValidCIdentifier(StringRef S) { - return !S.empty() && (isAlpha(S[0]) || S[0] == '_') && - llvm::all_of(llvm::drop_begin(S), - [](char C) { return C == '_' || isAlnum(C); }); -} - -Error linkBitcodeFiles(SmallVectorImpl &InputFiles, - SmallVectorImpl &OutputFiles, - const ArgList &Args) { - llvm::TimeTraceScope TimeScope("Link bitcode files"); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - - SmallVector BitcodeInputFiles; - DenseSet StrongResolutions; - DenseSet UsedInRegularObj; - DenseSet UsedInSharedLib; - BumpPtrAllocator Alloc; - StringSaver Saver(Alloc); - - // Search for bitcode files in the input and create an LTO input file. If - // it is not a bitcode file, scan its symbol table for symbols we need to - // save. - for (OffloadFile &File : InputFiles) { - MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), ""); - - file_magic Type = identify_magic(Buffer.getBuffer()); - switch (Type) { - case file_magic::bitcode: { - Expected IRSymtabOrErr = readIRSymtab(Buffer); - if (!IRSymtabOrErr) - return IRSymtabOrErr.takeError(); - - // Check for any strong resolutions we need to preserve. - for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { - for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) { - if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() && - !Sym.isUndefined()) - StrongResolutions.insert(Saver.save(Sym.Name)); - } - } - BitcodeInputFiles.emplace_back(std::move(File)); - continue; - } - case file_magic::elf_relocatable: - case file_magic::elf_shared_object: { - Expected> ObjFile = - ObjectFile::createObjectFile(Buffer); - if (!ObjFile) - continue; - - for (SymbolRef Sym : (*ObjFile)->symbols()) { - Expected Name = Sym.getName(); - if (!Name) - return Name.takeError(); - - // Record if we've seen these symbols in any object or shared - // libraries. - if ((*ObjFile)->isRelocatableObject()) - UsedInRegularObj.insert(Saver.save(*Name)); - else - UsedInSharedLib.insert(Saver.save(*Name)); - } - continue; - } - default: - continue; - } - } - - if (BitcodeInputFiles.empty()) - return Error::success(); - - // Remove all the bitcode files that we moved from the original input. - llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); }); - - // LTO Module hook to output bitcode without running the backend. - SmallVector BitcodeOutput; - auto OutputBitcode = [&](size_t, const Module &M) { - auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) + - "-jit-" + Triple.getTriple(), - "bc"); - if (!TempFileOrErr) - reportError(TempFileOrErr.takeError()); - - std::error_code EC; - raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - BitcodeOutput.push_back(*TempFileOrErr); - return false; - }; - - // We assume visibility of the whole program if every input file was - // bitcode. - auto Features = getTargetFeatures(BitcodeInputFiles); - auto LTOBackend = Args.hasArg(OPT_embed_bitcode) || - Args.hasArg(OPT_builtin_bitcode_EQ) || - Args.hasArg(OPT_clang_backend) - ? createLTO(Args, Features, OutputBitcode) - : createLTO(Args, Features); - - // We need to resolve the symbols so the LTO backend knows which symbols - // need to be kept or can be internalized. This is a simplified symbol - // resolution scheme to approximate the full resolution a linker would do. - uint64_t Idx = 0; - DenseSet PrevailingSymbols; - for (auto &BitcodeInput : BitcodeInputFiles) { - // Get a semi-unique buffer identifier for Thin-LTO. - StringRef Identifier = Saver.save( - std::to_string(Idx++) + "." + - BitcodeInput.getBinary()->getMemoryBufferRef().getBufferIdentifier()); - MemoryBufferRef Buffer = - MemoryBufferRef(BitcodeInput.getBinary()->getImage(), Identifier); - Expected> BitcodeFileOrErr = - llvm::lto::InputFile::create(Buffer); - if (!BitcodeFileOrErr) - return BitcodeFileOrErr.takeError(); - - // Save the input file and the buffer associated with its memory. - const auto Symbols = (*BitcodeFileOrErr)->symbols(); - SmallVector Resolutions(Symbols.size()); - size_t Idx = 0; - for (auto &Sym : Symbols) { - lto::SymbolResolution &Res = Resolutions[Idx++]; - - // We will use this as the prevailing symbol definition in LTO unless - // it is undefined or another definition has already been used. - Res.Prevailing = - !Sym.isUndefined() && - !(Sym.isWeak() && StrongResolutions.contains(Sym.getName())) && - PrevailingSymbols.insert(Saver.save(Sym.getName())).second; - - // We need LTO to preseve the following global symbols: - // 1) Symbols used in regular objects. - // 2) Sections that will be given a __start/__stop symbol. - // 3) Prevailing symbols that are needed visible to external - // libraries. - Res.VisibleToRegularObj = - UsedInRegularObj.contains(Sym.getName()) || - isValidCIdentifier(Sym.getSectionName()) || - (Res.Prevailing && - (Sym.getVisibility() != GlobalValue::HiddenVisibility && - !Sym.canBeOmittedFromSymbolTable())); - - // Identify symbols that must be exported dynamically and can be - // referenced by other files. - Res.ExportDynamic = - Sym.getVisibility() != GlobalValue::HiddenVisibility && - (UsedInSharedLib.contains(Sym.getName()) || - !Sym.canBeOmittedFromSymbolTable()); - - // The final definition will reside in this linkage unit if the symbol - // is defined and local to the module. This only checks for bitcode - // files, full assertion will require complete symbol resolution. - Res.FinalDefinitionInLinkageUnit = - Sym.getVisibility() != GlobalValue::DefaultVisibility && - (!Sym.isUndefined() && !Sym.isCommon()); - - // We do not support linker redefined symbols (e.g. --wrap) for device - // image linking, so the symbols will not be changed after LTO. - Res.LinkerRedefined = false; - } - - // Add the bitcode file with its resolved symbols to the LTO job. - if (Error Err = LTOBackend->add(std::move(*BitcodeFileOrErr), Resolutions)) - return Err; - } - - // Run the LTO job to compile the bitcode. - size_t MaxTasks = LTOBackend->getMaxTasks(); - SmallVector Files(MaxTasks); - auto AddStream = - [&](size_t Task, - const Twine &ModuleName) -> std::unique_ptr { - int FD = -1; - auto &TempFile = Files[Task]; - StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o"; - std::string TaskStr = Task ? "." + std::to_string(Task) : ""; - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName) + "." + - Triple.getTriple() + "." + Arch + TaskStr, - Extension); - if (!TempFileOrErr) - reportError(TempFileOrErr.takeError()); - TempFile = *TempFileOrErr; - if (std::error_code EC = sys::fs::openFileForWrite(TempFile, FD)) - reportError(errorCodeToError(EC)); - return std::make_unique( - std::make_unique(FD, true)); - }; - - if (Error Err = LTOBackend->run(AddStream)) - return Err; - - if (LTOError) - return createStringError("Errors encountered inside the LTO pipeline."); - - // If we are embedding bitcode we only need the intermediate output. - bool SingleOutput = Files.size() == 1; - if (Args.hasArg(OPT_embed_bitcode)) { - if (BitcodeOutput.size() != 1 || !SingleOutput) - return createStringError("Cannot embed bitcode with multiple files."); - OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front())); - return Error::success(); - } - - // Append the new inputs to the device linker input. If the user requested - // an internalizing link we need to pass the bitcode to clang. - for (StringRef File : - Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ) - ? BitcodeOutput - : Files) - OutputFiles.push_back(File); - - return Error::success(); -} - Expected writeOffloadFile(const OffloadFile &File) { const OffloadBinary &Binary = *File.getBinary(); @@ -1327,15 +991,8 @@ Expected> linkAndWrapDeviceFiles( if (File.getBinary()->getOffloadKind() != OFK_None) ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind()); - // First link and remove all the input files containing bitcode if - // the target linker does not support it natively. + // Write any remaining device inputs to an output file. SmallVector InputFiles; - if (!linkerSupportsLTO(LinkerArgs)) - if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs)) - return Err; - - // Write any remaining device inputs to an output file for the - // linker. for (const OffloadFile &File : Input) { auto FileNameOrErr = writeOffloadFile(File); if (!FileNameOrErr) @@ -1344,10 +1001,7 @@ Expected> linkAndWrapDeviceFiles( } // Link the remaining device files using the device linker. - auto OutputOrErr = - !Args.hasArg(OPT_embed_bitcode) || linkerSupportsLTO(LinkerArgs) - ? linkDevice(InputFiles, LinkerArgs) - : InputFiles.front(); + auto OutputOrErr = linkDevice(InputFiles, LinkerArgs); if (!OutputOrErr) return OutputOrErr.takeError(); From 70af40ba74cf62fdaa3ae1d7db972c138655049f Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 29 Oct 2024 20:14:14 +0000 Subject: [PATCH 353/425] [hwasan] Fix forward '[hwasan] Flush stderr/stdout in tests (#114083)' 3754fc1e9af38951aa00181c0e8110174d3f94fd broke the build because subsequent checks depend on the line numbers https://lab.llvm.org/buildbot/#/builders/174/builds/7534/steps/6/logs/FAIL__HWAddressSanitizer-x86_64__use-after-free_c --- compiler-rt/test/hwasan/TestCases/use-after-free.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/test/hwasan/TestCases/use-after-free.c b/compiler-rt/test/hwasan/TestCases/use-after-free.c index b4b79875e8111eb..fe4f8b32ea10060 100644 --- a/compiler-rt/test/hwasan/TestCases/use-after-free.c +++ b/compiler-rt/test/hwasan/TestCases/use-after-free.c @@ -32,11 +32,11 @@ int main() { // // CHECK: freed by thread {{.*}} here: // CHECK: #0 {{.*}} in {{.*}}free{{.*}} {{.*}}hwasan_allocation_functions.cpp - // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-19]] + // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-20]] // CHECK: previously allocated by thread {{.*}} here: // CHECK: #0 {{.*}} in {{.*}}malloc{{.*}} {{.*}}hwasan_allocation_functions.cpp - // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-24]] + // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-25]] // CHECK: Memory tags around the buggy address (one tag corresponds to 16 bytes): // CHECK: =>{{.*}}[[MEM_TAG]] // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in main From 8193832fb988e3df1e8e726634783805dca8d9b6 Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 29 Oct 2024 13:23:33 -0700 Subject: [PATCH 354/425] [lldb] Search main function with lldb::eFunctionNameTypeFull when getting default file and line. (#113980) This is to work around the fact that `SymbolFileNativePDB::FindFunctions` only support `lldb::eFunctionNameTypeFull` and `lldb::eFunctionNameTypeMethod` now. Since `main`'s full name is the same as base name (`main`), it's okay to search with `lldb::eFunctionNameTypeFull` when trying to get the default file and line. With this, `lldb/test/Shell/Driver/TestSingleQuote.test` passes on Windows with NativePDB plugin. --- lldb/source/Core/SourceManager.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index fd5b49946c6a927..27a9edeef4249e0 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -430,7 +430,7 @@ SourceManager::GetDefaultFileAndLine() { false; // Force it to be a debug symbol. function_options.include_inlines = true; executable_ptr->FindFunctions(main_name, CompilerDeclContext(), - lldb::eFunctionNameTypeBase, + lldb::eFunctionNameTypeFull, function_options, sc_list); for (const SymbolContext &sc : sc_list) { if (sc.function) { From a78861fc55d18046989ff4d624a037e9181da170 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 29 Oct 2024 13:34:28 -0700 Subject: [PATCH 355/425] [NvlinkWrapper] Add support for `--undefined` (#113934) Summary: This flag is pretty canonical in ELF linkers, it allows us to force the link job to extract a library if it defines a specific symbol. This is mostly useful for letting us forcibly extract things that don't fit the normal model (i.e. kernels) from static libraries. --- clang/test/Driver/nvlink-wrapper.c | 13 ++++++++++--- .../clang-nvlink-wrapper/ClangNVLinkWrapper.cpp | 3 +++ clang/tools/clang-nvlink-wrapper/NVLinkOpts.td | 7 +++++-- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/clang/test/Driver/nvlink-wrapper.c b/clang/test/Driver/nvlink-wrapper.c index 2b0993caee42482..79f4a6641732f79 100644 --- a/clang/test/Driver/nvlink-wrapper.c +++ b/clang/test/Driver/nvlink-wrapper.c @@ -21,12 +21,13 @@ int bar() { } #else extern int y; -int __attribute__((visibility("hidden"))) x = 999; +extern int x; int baz() { return y + x; } #endif // Create various inputs to test basic linking and LTO capabilities. Creating a // CUDA binary requires access to the `ptxas` executable, so we just use x64. +// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.o // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -DX -o %t-x.o // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -DY -o %t-y.o // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -DZ -o %t-z.o @@ -36,6 +37,7 @@ int baz() { return y + x; } // RUN: llvm-ar rcs %t-y.a %t-y.o // RUN: llvm-ar rcs %t-z.a %t-z.o // RUN: llvm-ar rcs %t-w.a %t-w.o +// RUN: llvm-ar rcs %t-u.a %t-u.o // // Check that we forward any unrecognized argument to 'nvlink'. @@ -49,11 +51,16 @@ int baz() { return y + x; } // `libx.a` and `liby.a` because extern weak symbols do not extract and `libz.a` // is not used at all. // -// RUN: clang-nvlink-wrapper --dry-run %t-x.a %t-u.o %t-y.a %t-z.a %t-w.a \ +// RUN: clang-nvlink-wrapper --dry-run %t-x.a %t-u.a %t-y.a %t-z.a %t-w.a %t.o \ // RUN: -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LINK // LINK: nvlink{{.*}} -arch sm_52 -o a.out [[INPUT:.+]].cubin {{.*}}-x-{{.*}}.cubin{{.*}}-y-{{.*}}.cubin -// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.o +// +// Same as above but we use '--undefined' to forcibly extract 'libz.a' +// +// RUN: clang-nvlink-wrapper --dry-run %t-x.a %t-u.a %t-y.a %t-z.a %t-w.a %t.o \ +// RUN: -u z -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LINK +// UNDEFINED: nvlink{{.*}} -arch sm_52 -o a.out [[INPUT:.+]].cubin {{.*}}-x-{{.*}}.cubin{{.*}}-y-{{.*}}.cubin{{.*}}-z-{{.*}}.cubin // // Check that the LTO interface works and properly preserves symbols used in a diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp index b9767a7a03d0b59..bc191afdca739df 100644 --- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp +++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp @@ -250,6 +250,7 @@ struct Symbol { }; Symbol() : File(), Flags(None), UsedInRegularObj(false) {} + Symbol(Symbol::Flags Flags) : File(), Flags(Flags), UsedInRegularObj(true) {} Symbol(MemoryBufferRef File, const irsymtab::Reader::SymbolRef Sym) : File(File), Flags(0), UsedInRegularObj(false) { @@ -535,6 +536,8 @@ Expected> getInput(const ArgList &Args) { bool Extracted = true; StringMap SymTab; + for (auto &Sym : Args.getAllArgValues(OPT_u)) + SymTab[Sym] = Symbol(Symbol::Undefined); SmallVector> LinkerInput; while (Extracted) { Extracted = false; diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td index a80c5937b429923..6de1a25c14f8be0 100644 --- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td +++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td @@ -43,11 +43,11 @@ def plugin : JoinedOrSeparate<["--", "-"], "plugin">, Flags<[HelpHidden, WrapperOnlyOption]>; def arch : Separate<["--", "-"], "arch">, - HelpText<"Specify the 'sm_' name of the target architecture.">; + HelpText<"Specify the 'sm_' name of the target architecture">; def : Joined<["--", "-"], "plugin-opt=mcpu=">, Flags<[HelpHidden, WrapperOnlyOption]>, Alias; -def g : Flag<["-"], "g">, HelpText<"Specify that this was a debug compile.">; +def g : Flag<["-"], "g">, HelpText<"Specify that this was a debug compile">; def debug : Flag<["--"], "debug">, Alias; def lto_emit_llvm : Flag<["--"], "lto-emit-llvm">, Flags<[WrapperOnlyOption]>, @@ -55,6 +55,9 @@ def lto_emit_llvm : Flag<["--"], "lto-emit-llvm">, Flags<[WrapperOnlyOption]>, def lto_emit_asm : Flag<["--"], "lto-emit-asm">, Flags<[WrapperOnlyOption]>, HelpText<"Emit assembly code">; +def u : JoinedOrSeparate<["-"], "u">, HelpText<"Force undefined symbol during linking">; +def undefined : JoinedOrSeparate<["--"], "undefined">, Alias; + def O : Joined<["--", "-"], "plugin-opt=O">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Optimization level for LTO">; From 41baa69a7e2ab3df13334565aa6ccdae1b0113ad Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 29 Oct 2024 13:52:22 -0700 Subject: [PATCH 356/425] [BOLT] Fix warnings (#114116) This patch fixes: bolt/lib/Core/BinaryFunction.cpp:2537:13: error: enumeration value 'OpNegateRAStateWithPC' not handled in switch [-Werror,-Wswitch] bolt/lib/Core/BinaryFunction.cpp:2661:13: error: enumeration value 'OpNegateRAStateWithPC' not handled in switch [-Werror,-Wswitch] bolt/lib/Core/BinaryFunction.cpp:2805:13: error: enumeration value 'OpNegateRAStateWithPC' not handled in switch [-Werror,-Wswitch] --- bolt/lib/Core/BinaryFunction.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index ef3fba37817daa5..c12217d549479bc 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2577,6 +2577,7 @@ struct CFISnapshot { case MCCFIInstruction::OpAdjustCfaOffset: case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: + case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpLLVMDefAspaceCfa: case MCCFIInstruction::OpLabel: llvm_unreachable("unsupported CFI opcode"); @@ -2715,6 +2716,7 @@ struct CFISnapshotDiff : public CFISnapshot { case MCCFIInstruction::OpAdjustCfaOffset: case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: + case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpLLVMDefAspaceCfa: case MCCFIInstruction::OpLabel: llvm_unreachable("unsupported CFI opcode"); @@ -2864,6 +2866,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState, case MCCFIInstruction::OpAdjustCfaOffset: case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: + case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpLLVMDefAspaceCfa: case MCCFIInstruction::OpLabel: llvm_unreachable("unsupported CFI opcode"); From 94e7d9c0bfe517507ea08b00fb00c32fb2837a82 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Oct 2024 13:50:41 -0700 Subject: [PATCH 357/425] [RISCV] Remove Zvk* dependency checks from RISCVISAInfo::checkDependency. The Zvk* extensions now imply Zve32x or Zve64x so it shouldn't be possible to fail these dependency checks. --- llvm/lib/TargetParser/RISCVISAInfo.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index caa5a97747ee57b..de5b5c39c9ed271 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -751,17 +751,6 @@ Error RISCVISAInfo::checkDependency() { if (HasZvl && !HasVector) return getExtensionRequiresError("zvl*b", "v' or 'zve*"); - if (!HasVector) - for (auto Ext : - {"zvbb", "zvbc32e", "zvkb", "zvkg", "zvkgs", "zvkned", "zvknha", "zvksed", "zvksh"}) - if (Exts.count(Ext)) - return getExtensionRequiresError(Ext, "v' or 'zve*"); - - if (!Exts.count("zve64x")) - for (auto Ext : {"zvknhb", "zvbc"}) - if (Exts.count(Ext)) - return getExtensionRequiresError(Ext, "v' or 'zve64*"); - if ((HasZcmt || Exts.count("zcmp")) && HasD && (HasC || Exts.count("zcd"))) return getError(Twine("'") + (HasZcmt ? "zcmt" : "zcmp") + "' extension is incompatible with '" + From 83ae171722bea2722afa4efb0558a6d8b8844305 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 29 Oct 2024 21:02:32 +0000 Subject: [PATCH 358/425] [AArch64] Add ComputeNumSignBits for VASHR. (#113957) As with a normal ISD::SRA node, they take the number of sign bits of the incoming value and increase it by the shifted amount. --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 5 +++++ llvm/test/CodeGen/AArch64/arm64-vshift.ll | 12 ++++++++++++ llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp | 13 +++++++++++++ 3 files changed, 30 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 32ba2866ac81807..31a720ed7b5c77b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2536,6 +2536,11 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode( case AArch64ISD::FCMLTz: // Compares return either 0 or all-ones return VTBits; + case AArch64ISD::VASHR: { + unsigned Tmp = + DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + return std::min(Tmp + Op.getConstantOperandVal(1), VTBits); + } } return 1; diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 1dfd977186b0e73..7af7c235f9ac16b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -3560,4 +3560,16 @@ entry: ret <4 x i16> %vrshrn_n1 } +define <8 x i16> @signbits_vashr(<8 x i16> %a) { +; CHECK-LABEL: signbits_vashr: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr.8h v0, v0, #8 +; CHECK-NEXT: sshr.8h v0, v0, #9 +; CHECK-NEXT: ret + %b = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> ) + %c = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %b, <8 x i16> ) + %d = ashr <8 x i16> %c, + ret <8 x i16> %d +} + declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index 3df72ec8115b6aa..ffedb2c74220f04 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "../lib/Target/AArch64/AArch64ISelLowering.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/AsmParser/Parser.h" @@ -167,6 +168,18 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_EXTRACT_SUBVECTOR) { EXPECT_EQ(DAG->ComputeNumSignBits(Op, DemandedElts), 7u); } +TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_VASHR) { + SDLoc Loc; + auto VecVT = MVT::v8i8; + auto Shift = DAG->getConstant(4, Loc, MVT::i32); + auto Vec0 = DAG->getConstant(1, Loc, VecVT); + auto Op1 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, Vec0, Shift); + EXPECT_EQ(DAG->ComputeNumSignBits(Op1), 8u); + auto VecA = DAG->getConstant(0xaa, Loc, VecVT); + auto Op2 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, VecA, Shift); + EXPECT_EQ(DAG->ComputeNumSignBits(Op2), 5u); +} + TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) { TargetLowering TL(*TM); From 680901ed8010319843cd81275b845d682f77e27f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 29 Oct 2024 21:04:31 +0000 Subject: [PATCH 359/425] [VPlan] Implement VPHeaderPHIRecipe::computeCost. Fill out computeCost implementations for various header PHI recipes, matching the legacy cost model for now. --- llvm/lib/Transforms/Vectorize/VPlan.h | 22 +++++++++++++++++++ .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 22 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a34e34a0d71f1ec..8d6025c89f72791 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2050,6 +2050,10 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe { /// Generate the phi nodes. void execute(VPTransformState &State) override = 0; + /// Return the cost of this header phi recipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, @@ -2295,6 +2299,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { void execute(VPTransformState &State) override; + /// Return the cost of this first-order recurrence phi recipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, @@ -3134,6 +3142,13 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { /// canonical, i.e. has the same start and step (of 1) as the canonical IV. bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const; + + /// Return the cost of this VPCanonicalIVPHIRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override { + // For now, match the behavior of the legacy cost model. + return 0; + } }; /// A recipe for generating the active lane mask for the vector loop that is @@ -3196,6 +3211,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe. void execute(VPTransformState &State) override; + /// Return the cost of this VPEVLBasedIVPHIRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override { + // For now, match the behavior of the legacy cost model. + return 0; + } + /// Returns true if the recipe only uses the first lane of operand \p Op. bool onlyFirstLaneUsed(const VPValue *Op) const override { assert(is_contained(operands(), Op) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b1e6086398c4df7..de7023167df8990 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1589,6 +1589,11 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput); +} + /// This function adds /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) /// to each vector element of Val. The sequence starts at StartIndex. @@ -3334,6 +3339,23 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { State.set(this, Phi); } +InstructionCost +VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + if (VF.isScalable() && VF.getKnownMinValue() == 1) + return InstructionCost::getInvalid(); + + SmallVector Mask(VF.getKnownMinValue()); + std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1); + Type *VectorTy = + ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF); + + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice, + cast(VectorTy), Mask, CostKind, + VF.getKnownMinValue() - 1); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { From 4a96081224b6c0f166760eab0c42ef3dfadd5ed1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 29 Oct 2024 14:12:14 -0700 Subject: [PATCH 360/425] [clang-linker-wrapper] Fix a warning This patch fixes: clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp:642:6: error: unused function 'diagnosticHandler' [-Werror,-Wunused-function] --- .../ClangLinkerWrapper.cpp | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 45274b797236c9b..561b73c73ad7df9 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -639,28 +639,6 @@ Expected linkDevice(ArrayRef InputFiles, } } -void diagnosticHandler(const DiagnosticInfo &DI) { - std::string ErrStorage; - raw_string_ostream OS(ErrStorage); - DiagnosticPrinterRawOStream DP(OS); - DI.print(DP); - - switch (DI.getSeverity()) { - case DS_Error: - WithColor::error(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Warning: - WithColor::warning(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Note: - WithColor::note(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Remark: - WithColor::remark(errs()) << ErrStorage << "\n"; - break; - } -} - Expected writeOffloadFile(const OffloadFile &File) { const OffloadBinary &Binary = *File.getBinary(); From a325c5359310316e393e7e446373fca645002ecb Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Tue, 29 Oct 2024 14:38:02 -0700 Subject: [PATCH 361/425] [lldb] Fix lldb windows build breakage from https://github.com/llvm/llvm-project/pull/112657. LLDB windows build failure: https://lab.llvm.org/buildbot/#/builders/141/builds/3462 --- lldb/unittests/Host/FileActionTest.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lldb/unittests/Host/FileActionTest.cpp b/lldb/unittests/Host/FileActionTest.cpp index 3d2c722552c9c2f..56227cd587e5bbe 100644 --- a/lldb/unittests/Host/FileActionTest.cpp +++ b/lldb/unittests/Host/FileActionTest.cpp @@ -10,6 +10,9 @@ #include "lldb/Host/FileAction.h" #include "gtest/gtest.h" +#if defined(_WIN32) +#include "lldb/Host/windows/PosixApi.h" +#endif using namespace lldb_private; From 5cfb07a5d067f7729a1578c7272fb314a89c8596 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 29 Oct 2024 14:46:51 -0700 Subject: [PATCH 362/425] Revert "[ADT] Use std::string_view inside StringRef (#113775)" (#114133) This patch reverts commit 89b5d88fb81362b4fb2f833790aa40b7eaa186da. Some sanitizer failures have been reported, indicating that StringRef and std::string_view handle data == nulptr differently. Also, they support different values for the max size (size_t v.s. ptrdiff_t). Thanks goes to Jorge Gorbe Moya for reporting these. --- llvm/include/llvm/ADT/StringRef.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index 0dcd4d90086eff5..5b525c8e56ecc9f 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -60,7 +60,11 @@ namespace llvm { using const_reverse_iterator = std::reverse_iterator; private: - std::string_view View; + /// The start of the string, in an external buffer. + const char *Data = nullptr; + + /// The length of the string. + size_t Length = 0; // Workaround memcmp issue with null pointers (undefined behavior) // by providing a specialized version @@ -82,26 +86,28 @@ namespace llvm { /// Construct a string ref from a cstring. /*implicit*/ constexpr StringRef(const char *Str LLVM_LIFETIME_BOUND) - : View(Str, Str ? + : Data(Str), Length(Str ? // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 - __builtin_strlen(Str) + __builtin_strlen(Str) #else - std::char_traits::length(Str) + std::char_traits::length(Str) #endif - : 0) { + : 0) { } /// Construct a string ref from a pointer and length. /*implicit*/ constexpr StringRef(const char *data LLVM_LIFETIME_BOUND, size_t length) - : View(data, length) {} + : Data(data), Length(length) {} /// Construct a string ref from an std::string. - /*implicit*/ StringRef(const std::string &Str) : View(Str) {} + /*implicit*/ StringRef(const std::string &Str) + : Data(Str.data()), Length(Str.length()) {} /// Construct a string ref from an std::string_view. - /*implicit*/ constexpr StringRef(std::string_view Str) : View(Str) {} + /*implicit*/ constexpr StringRef(std::string_view Str) + : Data(Str.data()), Length(Str.size()) {} /// @} /// @name Iterators @@ -135,13 +141,13 @@ namespace llvm { /// data - Get a pointer to the start of the string (which may not be null /// terminated). - [[nodiscard]] constexpr const char *data() const { return View.data(); } + [[nodiscard]] constexpr const char *data() const { return Data; } /// empty - Check if the string is empty. [[nodiscard]] constexpr bool empty() const { return size() == 0; } /// size - Get the string size. - [[nodiscard]] constexpr size_t size() const { return View.size(); } + [[nodiscard]] constexpr size_t size() const { return Length; } /// front - Get the first character in the string. [[nodiscard]] char front() const { From 8b55162e195783dd27e1c69fb4d97971ef76725b Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 29 Oct 2024 23:56:10 +0200 Subject: [PATCH 363/425] [RISCV] Add cost model tests for scalable FP reductions. NFC There are already some in reduce-scalable-fp.ll but this makes it a bit easier to see the difference alongside their fixed-length counterparts. --- .../Analysis/CostModel/RISCV/reduce-fadd.ll | 181 ++++++++++++++---- .../Analysis/CostModel/RISCV/reduce-fmul.ll | 132 +++++++++++++ 2 files changed, 276 insertions(+), 37 deletions(-) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll index 588d852d7f26e20..196e7376677a54f 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll @@ -13,6 +13,12 @@ define void @reduce_fadd_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_bfloat' @@ -24,6 +30,12 @@ define void @reduce_fadd_bfloat() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -34,6 +46,12 @@ define void @reduce_fadd_bfloat() { %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef) %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef) %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef) + %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0.0, undef) + %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0.0, undef) + %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0.0, undef) + %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, undef) + %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, undef) + %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, undef) ret void } @@ -47,6 +65,12 @@ define void @reduce_fadd_half() { ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half' @@ -58,6 +82,12 @@ define void @reduce_fadd_half() { ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_half' @@ -69,6 +99,12 @@ define void @reduce_fadd_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) @@ -79,6 +115,12 @@ define void @reduce_fadd_half() { %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) + %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, undef) + %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, undef) + %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, undef) + %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, undef) + %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, undef) + %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, undef) ret void } @@ -92,6 +134,11 @@ define void @reduce_fadd_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_float' @@ -103,6 +150,11 @@ define void @reduce_fadd_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) @@ -113,6 +165,11 @@ define void @reduce_fadd_float() { %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) + %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, undef) + %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, undef) + %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, undef) + %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, undef) + %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, undef) ret void } @@ -126,6 +183,10 @@ define void @reduce_fadd_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_double' @@ -137,6 +198,10 @@ define void @reduce_fadd_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) @@ -147,11 +212,15 @@ define void @reduce_fadd_double() { %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) + %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, undef) + %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, undef) + %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, undef) + %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, undef) ret void } -define void @reduce_oredered_fadd_bfloat() { -; FP-REDUCE-LABEL: 'reduce_oredered_fadd_bfloat' +define void @reduce_ordered_fadd_bfloat() { +; FP-REDUCE-LABEL: 'reduce_ordered_fadd_bfloat' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) @@ -160,9 +229,15 @@ define void @reduce_oredered_fadd_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SIZE-LABEL: 'reduce_oredered_fadd_bfloat' +; SIZE-LABEL: 'reduce_ordered_fadd_bfloat' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) @@ -171,6 +246,12 @@ define void @reduce_oredered_fadd_bfloat() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -181,11 +262,17 @@ define void @reduce_oredered_fadd_bfloat() { %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef) %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef) %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef) + %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0.0, undef) + %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0.0, undef) + %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0.0, undef) + %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, undef) + %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, undef) + %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, undef) ret void } -define void @reduce_oredered_fadd_half() { -; FP-REDUCE-LABEL: 'reduce_oredered_fadd_half' +define void @reduce_ordered_fadd_half() { +; FP-REDUCE-LABEL: 'reduce_ordered_fadd_half' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) @@ -194,9 +281,15 @@ define void @reduce_oredered_fadd_half() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SIZE-LABEL: 'reduce_oredered_fadd_half' +; SIZE-LABEL: 'reduce_ordered_fadd_half' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) @@ -205,6 +298,12 @@ define void @reduce_oredered_fadd_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) @@ -215,11 +314,17 @@ define void @reduce_oredered_fadd_half() { %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) + %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, undef) + %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, undef) + %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, undef) + %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, undef) + %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, undef) + %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, undef) ret void } -define void @reduce_oredered_fadd_float() { -; FP-REDUCE-LABEL: 'reduce_oredered_fadd_float' +define void @reduce_ordered_fadd_float() { +; FP-REDUCE-LABEL: 'reduce_ordered_fadd_float' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) @@ -228,9 +333,14 @@ define void @reduce_oredered_fadd_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SIZE-LABEL: 'reduce_oredered_fadd_float' +; SIZE-LABEL: 'reduce_ordered_fadd_float' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) @@ -239,6 +349,11 @@ define void @reduce_oredered_fadd_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) @@ -249,11 +364,16 @@ define void @reduce_oredered_fadd_float() { %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) + %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, undef) + %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, undef) + %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, undef) + %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, undef) + %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, undef) ret void } -define void @reduce_oredered_fadd_double() { -; FP-REDUCE-LABEL: 'reduce_oredered_fadd_double' +define void @reduce_ordered_fadd_double() { +; FP-REDUCE-LABEL: 'reduce_ordered_fadd_double' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) @@ -262,9 +382,13 @@ define void @reduce_oredered_fadd_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SIZE-LABEL: 'reduce_oredered_fadd_double' +; SIZE-LABEL: 'reduce_ordered_fadd_double' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) @@ -273,6 +397,10 @@ define void @reduce_oredered_fadd_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) @@ -283,30 +411,9 @@ define void @reduce_oredered_fadd_double() { %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) + %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, undef) + %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, undef) + %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, undef) + %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, undef) ret void } - -declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) -declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>) -declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) -declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) -declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) -declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>) -declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>) -declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>) -declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>) -declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) -declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) -declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) -declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) -declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>) -declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>) -declare float @llvm.vector.reduce.fadd.v128f32(float, <128 x float>) -declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>) -declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) -declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) -declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) -declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) -declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) -declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>) -declare double @llvm.vector.reduce.fadd.v128f64(double, <128 x double>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll index 162562c7b89310d..211bcb1343eea40 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll @@ -13,6 +13,12 @@ define void @reduce_fmul_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_bfloat' @@ -24,6 +30,12 @@ define void @reduce_fmul_bfloat() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -34,6 +46,12 @@ define void @reduce_fmul_bfloat() { %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef) %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef) %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef) + %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, undef) + %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, undef) + %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, undef) + %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, undef) + %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, undef) + %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, undef) ret void } @@ -47,6 +65,12 @@ define void @reduce_fmul_half() { ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half' @@ -58,6 +82,12 @@ define void @reduce_fmul_half() { ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_half' @@ -69,6 +99,12 @@ define void @reduce_fmul_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef) @@ -79,6 +115,12 @@ define void @reduce_fmul_half() { %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef) %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef) %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef) + %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, undef) + %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, undef) + %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, undef) + %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, undef) + %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, undef) + %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, undef) ret void } @@ -92,6 +134,11 @@ define void @reduce_fmul_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 451 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 483 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 547 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_float' @@ -103,6 +150,11 @@ define void @reduce_fmul_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef) @@ -113,6 +165,11 @@ define void @reduce_fmul_float() { %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef) %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef) %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef) + %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, undef) + %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, undef) + %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, undef) + %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, undef) + %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, undef) ret void } @@ -126,6 +183,10 @@ define void @reduce_fmul_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 457 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 585 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_double' @@ -137,6 +198,10 @@ define void @reduce_fmul_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef) @@ -147,6 +212,10 @@ define void @reduce_fmul_double() { %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef) %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef) %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef) + %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, undef) + %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, undef) + %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, undef) + %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, undef) ret void } @@ -160,6 +229,12 @@ define void @reduce_ordered_fmul_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_bfloat' @@ -171,6 +246,12 @@ define void @reduce_ordered_fmul_bfloat() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -181,6 +262,12 @@ define void @reduce_ordered_fmul_bfloat() { %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef) %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef) %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef) + %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, undef) + %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, undef) + %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, undef) + %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, undef) + %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, undef) + %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, undef) ret void } @@ -194,6 +281,12 @@ define void @reduce_ordered_fmul_half() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_half' @@ -205,6 +298,12 @@ define void @reduce_ordered_fmul_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef) @@ -215,6 +314,12 @@ define void @reduce_ordered_fmul_half() { %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef) %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef) %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef) + %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, undef) + %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, undef) + %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, undef) + %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, undef) + %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, undef) + %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, undef) ret void } @@ -228,6 +333,11 @@ define void @reduce_ordered_fmul_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 254 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 508 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_float' @@ -239,6 +349,11 @@ define void @reduce_ordered_fmul_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 380 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef) @@ -249,6 +364,11 @@ define void @reduce_ordered_fmul_float() { %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef) %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef) %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef) + %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, undef) + %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, undef) + %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, undef) + %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, undef) + %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, undef) ret void } @@ -262,6 +382,10 @@ define void @reduce_ordered_fmul_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_double' @@ -273,6 +397,10 @@ define void @reduce_ordered_fmul_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef) @@ -283,5 +411,9 @@ define void @reduce_ordered_fmul_double() { %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef) %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef) %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef) + %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, undef) + %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, undef) + %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, undef) + %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, undef) ret void } From ca998b071eba1c92bf8535964183c7c4c3b258c3 Mon Sep 17 00:00:00 2001 From: vporpo Date: Tue, 29 Oct 2024 15:37:03 -0700 Subject: [PATCH 364/425] [SandboxVec][Legality] Check wrap flags (#113975) --- .../Vectorize/SandboxVectorizer/Legality.h | 3 +++ .../Vectorize/SandboxVectorizer/Legality.cpp | 15 +++++++++++++++ .../Vectorize/SandboxVectorizer/LegalityTest.cpp | 16 +++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index 49dcec26dbc5599..77ba5cd7f002e91 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -32,6 +32,7 @@ enum class ResultReason { DiffOpcodes, DiffTypes, DiffMathFlags, + DiffWrapFlags, }; #ifndef NDEBUG @@ -56,6 +57,8 @@ struct ToStr { return "DiffTypes"; case ResultReason::DiffMathFlags: return "DiffMathFlags"; + case ResultReason::DiffWrapFlags: + return "DiffWrapFlags"; } llvm_unreachable("Unknown ResultReason enum"); } diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index 346d8a90589f555..1cc6356300e492b 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -55,6 +55,21 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( return ResultReason::DiffMathFlags; } + // TODO: Allow vectorization by using common flags. + // For now Pack if they don't have the same wrap flags. + bool CanHaveWrapFlags = + isa(I0) || isa(I0); + if (CanHaveWrapFlags) { + bool NUW0 = I0->hasNoUnsignedWrap(); + bool NSW0 = I0->hasNoSignedWrap(); + if (any_of(drop_begin(Bndl), [NUW0, NSW0](auto *V) { + return cast(V)->hasNoUnsignedWrap() != NUW0 || + cast(V)->hasNoSignedWrap() != NSW0; + })) { + return ResultReason::DiffWrapFlags; + } + } + // TODO: Missing checks return std::nullopt; diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index aaa8e96de6d171c..50b78f6f48afdf7 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -29,7 +29,7 @@ struct LegalityTest : public testing::Test { TEST_F(LegalityTest, Legality) { parseIR(C, R"IR( -define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float %farg0, float %farg1) { +define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float %farg0, float %farg1, i64 %v0, i64 %v1) { %gep0 = getelementptr float, ptr %ptr, i32 0 %gep1 = getelementptr float, ptr %ptr, i32 1 %gep3 = getelementptr float, ptr %ptr, i32 3 @@ -42,6 +42,8 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float store i8 %arg, ptr %gep1 %fadd0 = fadd float %farg0, %farg0 %fadd1 = fadd fast float %farg1, %farg1 + %trunc0 = trunc nuw nsw i64 %v0 to i8 + %trunc1 = trunc nsw i64 %v1 to i8 ret void } )IR"); @@ -62,6 +64,8 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float auto *StI8 = cast(&*It++); auto *FAdd0 = cast(&*It++); auto *FAdd1 = cast(&*It++); + auto *Trunc0 = cast(&*It++); + auto *Trunc1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; const auto &Result = Legality.canVectorize({St0, St1}); @@ -98,6 +102,13 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float EXPECT_EQ(cast(Result).getReason(), sandboxir::ResultReason::DiffMathFlags); } + { + // Check DiffWrapFlags + const auto &Result = Legality.canVectorize({Trunc0, Trunc1}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::DiffWrapFlags); + } } #ifndef NDEBUG @@ -124,5 +135,8 @@ TEST_F(LegalityTest, LegalityResultDump) { EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffMathFlags), "Pack Reason: DiffMathFlags")); + EXPECT_TRUE(Matches(Legality.createLegalityResult( + sandboxir::ResultReason::DiffWrapFlags), + "Pack Reason: DiffWrapFlags")); } #endif // NDEBUG From 2c5eea0e88a6ef6bf932d90c67aaec2bcc59d340 Mon Sep 17 00:00:00 2001 From: Kunwar Grover Date: Tue, 29 Oct 2024 22:47:44 +0000 Subject: [PATCH 365/425] [mlir][Vector] Fix vector.insert folder for scalar to 0-d inserts (#113828) The current vector.insert folder tries to replace a scalar with a 0-rank vector. This patch fixes this crash by not folding unless they types of the result and replacement are same. --- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 8 ++-- mlir/test/Dialect/Vector/canonicalize.mlir | 48 +++++++++++++++++----- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index d71a236f62f454d..1853ae04f45d90c 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -2951,11 +2951,11 @@ void InsertOp::getCanonicalizationPatterns(RewritePatternSet &results, InsertOpConstantFolder>(context); } -// Eliminates insert operations that produce values identical to their source -// value. This happens when the source and destination vectors have identical -// sizes. OpFoldResult vector::InsertOp::fold(FoldAdaptor adaptor) { - if (getNumIndices() == 0) + // Fold "vector.insert %v, %dest [] : vector<2x2xf32> from vector<2x2xf32>" to + // %v. Note: Do not fold "vector.insert %v, %dest [] : f32 into vector" + // (type mismatch). + if (getNumIndices() == 0 && getSourceType() == getType()) return getSource(); return {}; } diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 6d6bc199e601c0a..c963460e7259fb5 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -800,6 +800,43 @@ func.func @fold_extract_shapecast_to_shapecast(%arg0 : vector<3x4xf32>) -> vecto // ----- +// CHECK-LABEL: func @extract_no_fold_scalar_to_0d( +// CHECK-SAME: %[[v:.*]]: vector) +// CHECK: %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector +// CHECK: return %[[extract]] +func.func @extract_no_fold_scalar_to_0d(%v: vector) -> f32 { + %0 = vector.extract %v[] : f32 from vector + return %0 : f32 +} + +// ----- + +// CHECK-LABEL: func @insert_fold_same_rank( +// CHECK-SAME: %[[v:.*]]: vector<2x2xf32>) +// CHECK: %[[CST:.+]] = arith.constant +// CHECK-SAME: : vector<2x2xf32> +// CHECK-NOT: vector.insert +// CHECK: return %[[CST]] +func.func @insert_fold_same_rank(%v: vector<2x2xf32>) -> vector<2x2xf32> { + %cst = arith.constant dense<0.000000e+00> : vector<2x2xf32> + %0 = vector.insert %cst, %v [] : vector<2x2xf32> into vector<2x2xf32> + return %0 : vector<2x2xf32> +} + +// ----- + +// CHECK-LABEL: func @insert_no_fold_scalar_to_0d( +// CHECK-SAME: %[[v:.*]]: vector) +// CHECK: %[[extract:.*]] = vector.insert %{{.*}}, %[[v]] [] : f32 into vector +// CHECK: return %[[extract]] +func.func @insert_no_fold_scalar_to_0d(%v: vector) -> vector { + %cst = arith.constant 0.000000e+00 : f32 + %0 = vector.insert %cst, %v [] : f32 into vector + return %0 : vector +} + +// ----- + // CHECK-LABEL: dont_fold_expand_collapse // CHECK: %[[A:.*]] = vector.shape_cast %{{.*}} : vector<1x1x64xf32> to vector<1x1x8x8xf32> // CHECK: %[[B:.*]] = vector.shape_cast %{{.*}} : vector<1x1x8x8xf32> to vector<8x8xf32> @@ -2606,17 +2643,6 @@ func.func @rank_1_shuffle_to_interleave(%arg0: vector<6xi32>, %arg1: vector<6xi3 // ----- -// CHECK-LABEL: func @extract_from_0d_regression( -// CHECK-SAME: %[[v:.*]]: vector) -// CHECK: %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector -// CHECK: return %[[extract]] -func.func @extract_from_0d_regression(%v: vector) -> f32 { - %0 = vector.extract %v[] : f32 from vector - return %0 : f32 -} - -// ----- - // CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression( // CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: vector, %[[c:.*]]: vector<2xf32>) func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) { From d90a0d1d986e12c4a6ff2eeffe29cedc34e6e2ab Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Tue, 29 Oct 2024 16:00:08 -0700 Subject: [PATCH 366/425] Remove spurious includes from sinpif_test.cpp MPFR functionality is provided by the MPFRWrapper instead, and the direct "mpfr.h" inclusion is not needed - this test doesn't rely on it (similar to its sibling libc/test/src/math/exhaustive/cospif_test.cpp that doesn't have it). --- libc/test/src/math/exhaustive/sinpif_test.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/libc/test/src/math/exhaustive/sinpif_test.cpp b/libc/test/src/math/exhaustive/sinpif_test.cpp index 8bc1d81eb7e3d26..81abac0b73f27a0 100644 --- a/libc/test/src/math/exhaustive/sinpif_test.cpp +++ b/libc/test/src/math/exhaustive/sinpif_test.cpp @@ -7,10 +7,8 @@ //===----------------------------------------------------------------------===// #include "exhaustive_test.h" -#include "mpfr.h" #include "src/math/sinpif.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include namespace mpfr = LIBC_NAMESPACE::testing::mpfr; From f71ea0e72e2419691e3c67bdbbe338d314ee77c0 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 30 Oct 2024 07:16:03 +0800 Subject: [PATCH 367/425] [libc++][test] Augment `test_alloc` in `deallocate_size.pass.cpp` (#113638) Making it meet the requirements for allocator since C++11. Fixes #113609. This PR doesn't make it meet the C++03 allocator requirements, because that would make the type too verbose and libc++ has backported many C++11 features to the C++03 mode. Drive-by: Removes the `TEST_CONSTEXPR_CXX14` on `allocate`/`dealocate` which is never in effect (and causes IFNDR-ness before C++23), since these functions modify the namespace-scoped variable `allocated_`. --- .../string.capacity/deallocate_size.pass.cpp | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp index 1203b2f3ec18f92..00f9e2b8467837d 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp @@ -34,15 +34,32 @@ struct test_alloc { typedef test_alloc other; }; - TEST_CONSTEXPR_CXX14 pointer allocate(size_type n, const void* = nullptr) { + TEST_CONSTEXPR test_alloc() TEST_NOEXCEPT {} + + template + TEST_CONSTEXPR test_alloc(const test_alloc&) TEST_NOEXCEPT {} + + pointer allocate(size_type n, const void* = nullptr) { allocated_ += n; return std::allocator().allocate(n); } - TEST_CONSTEXPR_CXX14 void deallocate(pointer p, size_type s) { + void deallocate(pointer p, size_type s) { allocated_ -= s; std::allocator().deallocate(p, s); } + + template + friend TEST_CONSTEXPR bool operator==(const test_alloc&, const test_alloc&) TEST_NOEXCEPT { + return true; + } + +#if TEST_STD_VER < 20 + template + friend TEST_CONSTEXPR bool operator!=(const test_alloc&, const test_alloc&) TEST_NOEXCEPT { + return false; + } +#endif }; template From 0f8dbb2fac532e37a9859d52982f0e8994305a11 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 30 Oct 2024 07:16:40 +0800 Subject: [PATCH 368/425] [libc++] Constrain additional overloads of `pow` for `complex` harder (#110235) Fixes #109858. The changes in #81379 broke some 3rd party library code that expected usability of `std::complex`. Although such code isn't portable per [complex.numbers.general]/2, it might be better to make these additional overloads not to interfere overload resolution too much. --------- Co-authored-by: Louis Dionne --- libcxx/include/complex | 6 +- .../complex.number/cmplx.over.pow.pass.cpp | 84 +++++++++++++++++++ 2 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp diff --git a/libcxx/include/complex b/libcxx/include/complex index 4030d96b003d568..15e42800fbfa0a6 100644 --- a/libcxx/include/complex +++ b/libcxx/include/complex @@ -1097,20 +1097,20 @@ inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> pow(const complex<_Tp>& __x, const com return std::exp(__y * std::log(__x)); } -template +template ::value && is_floating_point<_Up>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI complex::type> pow(const complex<_Tp>& __x, const complex<_Up>& __y) { typedef complex::type> result_type; return std::pow(result_type(__x), result_type(__y)); } -template ::value, int> = 0> +template ::value && is_arithmetic<_Up>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI complex::type> pow(const complex<_Tp>& __x, const _Up& __y) { typedef complex::type> result_type; return std::pow(result_type(__x), result_type(__y)); } -template ::value, int> = 0> +template ::value && is_floating_point<_Up>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI complex::type> pow(const _Tp& __x, const complex<_Up>& __y) { typedef complex::type> result_type; return std::pow(result_type(__x), result_type(__y)); diff --git a/libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp b/libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp new file mode 100644 index 000000000000000..1c790c283e43876 --- /dev/null +++ b/libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp @@ -0,0 +1,84 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template complex<__promote::type> pow(const complex&, const U&); +// template complex<__promote::type> pow(const complex&, const complex&); +// template complex<__promote::type> pow(const T&, const complex&); + +// Test that these additional overloads are free from catching std::complex, +// which is expected by several 3rd party libraries, see https://github.com/llvm/llvm-project/issues/109858. +// +// Note that we reserve the right to break this in the future if we have a reason to, but for the time being, +// make sure we don't break this property unintentionally. +#include +#include +#include +#include + +#include "test_macros.h" + +namespace usr { +struct usr_tag {}; + +template +typename std::enable_if<(std::is_same::value && std::is_floating_point::value) || + (std::is_floating_point::value && std::is_same::value), + int>::type +pow(const T&, const std::complex&) { + return std::is_same::value ? 0 : 1; +} + +template +typename std::enable_if<(std::is_same::value && std::is_floating_point::value) || + (std::is_floating_point::value && std::is_same::value), + int>::type +pow(const std::complex&, const U&) { + return std::is_same::value ? 2 : 3; +} + +template +typename std::enable_if<(std::is_same::value && std::is_floating_point::value) || + (std::is_floating_point::value && std::is_same::value), + int>::type +pow(const std::complex&, const std::complex&) { + return std::is_same::value ? 4 : 5; +} +} // namespace usr + +int main(int, char**) { + using std::pow; + using usr::pow; + + usr::usr_tag tag; + const std::complex ctag; + + assert(pow(tag, std::complex(1.0f)) == 0); + assert(pow(std::complex(1.0f), tag) == 2); + assert(pow(tag, std::complex(1.0)) == 0); + assert(pow(std::complex(1.0), tag) == 2); + assert(pow(tag, std::complex(1.0l)) == 0); + assert(pow(std::complex(1.0l), tag) == 2); + + assert(pow(1.0f, ctag) == 1); + assert(pow(ctag, 1.0f) == 3); + assert(pow(1.0, ctag) == 1); + assert(pow(ctag, 1.0) == 3); + assert(pow(1.0l, ctag) == 1); + assert(pow(ctag, 1.0l) == 3); + + assert(pow(ctag, std::complex(1.0f)) == 4); + assert(pow(std::complex(1.0f), ctag) == 5); + assert(pow(ctag, std::complex(1.0)) == 4); + assert(pow(std::complex(1.0), ctag) == 5); + assert(pow(ctag, std::complex(1.0l)) == 4); + assert(pow(std::complex(1.0l), ctag) == 5); + + return 0; +} From 75b37c3191254d0c418058cb94c3a7922b7ba71e Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 29 Oct 2024 16:41:01 -0700 Subject: [PATCH 369/425] [DWARF] Fix arity of DW_OP_bra (#114136) Found by my proof-of-concept DWARF expression evaluator fuzzer. --- lldb/unittests/Expression/DWARFExpressionTest.cpp | 3 +++ llvm/include/llvm/BinaryFormat/Dwarf.def | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp index f9e0605fce29d61..fdc9bfae1876c51 100644 --- a/lldb/unittests/Expression/DWARFExpressionTest.cpp +++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp @@ -181,6 +181,9 @@ TEST(DWARFExpression, DW_OP_bra) { }), // clang-format on llvm::HasValue(0x42)); + + EXPECT_THAT_ERROR(Evaluate({DW_OP_bra, 0x01, 0x00}).takeError(), + llvm::Failed()); } TEST(DWARFExpression, DW_OP_convert) { diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index 9336f2a454ae47b..0cbbbe823c06b50 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -728,7 +728,7 @@ HANDLE_DW_OP(0x24, shl, 0, 2, 2, DWARF) HANDLE_DW_OP(0x25, shr, 0, 2, 2, DWARF) HANDLE_DW_OP(0x26, shra, 0, 2, 2, DWARF) HANDLE_DW_OP(0x27, xor, 0, 2, 2, DWARF) -HANDLE_DW_OP(0x28, bra, 1, 0, 2, DWARF) +HANDLE_DW_OP(0x28, bra, 1, 1, 2, DWARF) HANDLE_DW_OP(0x29, eq, 0, 2, 2, DWARF) HANDLE_DW_OP(0x2a, ge, 0, 2, 2, DWARF) HANDLE_DW_OP(0x2b, gt, 0, 2, 2, DWARF) From 50dd9225f8b33a924970039772faeac03e0a5716 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 29 Oct 2024 23:46:54 +0000 Subject: [PATCH 370/425] Revert "[asan] Flush stderr in test (#114084)" This reverts commit e205929399d9ee4782b2d8ef1b659f918bdfe7c2. Reason: did not solve the QEMU bot issues (https://lab.llvm.org/buildbot/#/builders/139/builds/5552/steps/30/logs/stdio) and it shouldn't have been necessary anyway (https://github.com/llvm/llvm-project/pull/114084#issuecomment-2445513320) --- compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp b/compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp index dfeb8ad5c7b53fa..87be90014d56e82 100644 --- a/compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp +++ b/compiler-rt/test/asan/TestCases/Posix/ignore_free_hook.cpp @@ -26,17 +26,14 @@ bool ignore_free = false; extern "C" { WEAK_ON_APPLE void __sanitizer_free_hook(const volatile void *ptr) { - if (ptr == glob_ptr) { + if (ptr == glob_ptr) fprintf(stderr, "Free Hook\n"); - fflush(stderr); - } } WEAK_ON_APPLE int __sanitizer_ignore_free_hook(const volatile void *ptr) { if (ptr != glob_ptr) return 0; fprintf(stderr, ignore_free ? "Free Ignored\n" : "Free Respected\n"); - fflush(stderr); return ignore_free; } } // extern "C" From 0fa2fb3ed0bc726e5dcf8258bf764aacd1c2e6dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 29 Oct 2024 17:00:41 -0700 Subject: [PATCH 371/425] [flang][cuda] Add conversion pattern for cuf.kernel_launch op (#114129) --- .../Optimizer/Transforms/CUFOpConversion.cpp | 70 ++++++++++++++++++- flang/test/Fir/CUDA/cuda-launch.fir | 64 +++++++++++++++++ 2 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 flang/test/Fir/CUDA/cuda-launch.fir diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 14cc1cb508cfc01..fe125db7b4061ec 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -15,6 +15,7 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" +#include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Runtime/CUDA/allocatable.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" @@ -620,6 +621,69 @@ struct CufDataTransferOpConversion const mlir::SymbolTable &symtab; }; +struct CUFLaunchOpConversion + : public mlir::OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + CUFLaunchOpConversion(mlir::MLIRContext *context, + const mlir::SymbolTable &symTab) + : OpRewritePattern(context), symTab{symTab} {} + + mlir::LogicalResult + matchAndRewrite(cuf::KernelLaunchOp op, + mlir::PatternRewriter &rewriter) const override { + mlir::Location loc = op.getLoc(); + auto idxTy = mlir::IndexType::get(op.getContext()); + auto zero = rewriter.create( + loc, rewriter.getIntegerType(32), rewriter.getI32IntegerAttr(0)); + auto gridSizeX = + rewriter.create(loc, idxTy, op.getGridX()); + auto gridSizeY = + rewriter.create(loc, idxTy, op.getGridY()); + auto gridSizeZ = + rewriter.create(loc, idxTy, op.getGridZ()); + auto blockSizeX = + rewriter.create(loc, idxTy, op.getBlockX()); + auto blockSizeY = + rewriter.create(loc, idxTy, op.getBlockY()); + auto blockSizeZ = + rewriter.create(loc, idxTy, op.getBlockZ()); + auto kernelName = mlir::SymbolRefAttr::get( + rewriter.getStringAttr(cudaDeviceModuleName), + {mlir::SymbolRefAttr::get( + rewriter.getContext(), + op.getCallee().getLeafReference().getValue())}); + mlir::Value clusterDimX, clusterDimY, clusterDimZ; + if (auto funcOp = symTab.lookup( + op.getCallee().getLeafReference())) { + if (auto clusterDimsAttr = funcOp->getAttrOfType( + cuf::getClusterDimsAttrName())) { + clusterDimX = rewriter.create( + loc, clusterDimsAttr.getX().getInt()); + clusterDimY = rewriter.create( + loc, clusterDimsAttr.getY().getInt()); + clusterDimZ = rewriter.create( + loc, clusterDimsAttr.getZ().getInt()); + } + } + auto gpuLaunchOp = rewriter.create( + loc, kernelName, mlir::gpu::KernelDim3{gridSizeX, gridSizeY, gridSizeZ}, + mlir::gpu::KernelDim3{blockSizeX, blockSizeY, blockSizeZ}, zero, + op.getArgs()); + if (clusterDimX && clusterDimY && clusterDimZ) { + gpuLaunchOp.getClusterSizeXMutable().assign(clusterDimX); + gpuLaunchOp.getClusterSizeYMutable().assign(clusterDimY); + gpuLaunchOp.getClusterSizeZMutable().assign(clusterDimZ); + } + rewriter.replaceOp(op, gpuLaunchOp); + return mlir::success(); + } + +private: + const mlir::SymbolTable &symTab; +}; + class CUFOpConversion : public fir::impl::CUFOpConversionBase { public: void runOnOperation() override { @@ -637,7 +701,8 @@ class CUFOpConversion : public fir::impl::CUFOpConversionBase { fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/false); fir::LLVMTypeConverter typeConverter(module, /*applyTBAA=*/false, /*forceUnifiedTBAATree=*/false, *dl); - target.addLegalDialect(); + target.addLegalDialect(); cuf::populateCUFToFIRConversionPatterns(typeConverter, *dl, symtab, patterns); if (mlir::failed(mlir::applyPartialConversion(getOperation(), target, @@ -656,5 +721,6 @@ void cuf::populateCUFToFIRConversionPatterns( patterns.insert(patterns.getContext(), &dl, &converter); patterns.insert(patterns.getContext()); - patterns.insert(patterns.getContext(), symtab); + patterns.insert( + patterns.getContext(), symtab); } diff --git a/flang/test/Fir/CUDA/cuda-launch.fir b/flang/test/Fir/CUDA/cuda-launch.fir new file mode 100644 index 000000000000000..f11bcbdb7fce55b --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-launch.fir @@ -0,0 +1,64 @@ +// RUN: fir-opt --split-input-file --cuf-convert %s | FileCheck %s + + +module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + gpu.module @cuda_device_mod { + gpu.func @_QPsub_device1() kernel { + cf.br ^bb1 + ^bb1: // pred: ^bb0 + gpu.return + } + gpu.func @_QPsub_device2(%arg0: !fir.ref) kernel { + cf.br ^bb1(%arg0 : !fir.ref) + ^bb1(%0: !fir.ref): // pred: ^bb0 + %1 = fir.declare %0 {uniq_name = "_QFsub1Ei"} : (!fir.ref) -> !fir.ref + %cst = arith.constant 2.000000e+00 : f32 + fir.store %cst to %1 : !fir.ref + gpu.return + } + } + + func.func @_QQmain() attributes {fir.bindc_name = "main"} { + %0 = fir.alloca f32 + // CHECK: %[[ALLOCA:.*]] = fir.alloca f32 + %c1 = arith.constant 1 : index + %c11_i32 = arith.constant 11 : i32 + %c6_i32 = arith.constant 6 : i32 + %c1_i32 = arith.constant 1 : i32 + // CHECK: gpu.launch_func @cuda_device_mod::@_QPsub_device1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) dynamic_shared_memory_size %c0{{.*}} + cuf.kernel_launch @cuda_device_mod::@_QPsub_device1<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32>>>() + + // CHECK: gpu.launch_func @cuda_device_mod::@_QPsub_device2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) dynamic_shared_memory_size %c0{{.*}} args(%[[ALLOCA]] : !fir.ref) + cuf.kernel_launch @cuda_device_mod::@_QPsub_device2<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32>>>(%0) : (!fir.ref) + return + } + +} + +// ----- + +module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + gpu.module @cuda_device_mod { + gpu.func @_QMmod1Psub1(%arg0: !fir.ref>) kernel { + gpu.return + } + } + + func.func @_QMmod1Psub1(%arg0: !fir.ref> {cuf.data_attr = #cuf.cuda, fir.bindc_name = "adev"}) attributes {cuf.cluster_dims = #cuf.cluster_dims, cuf.proc_attr = #cuf.cuda_proc} { + return + } + func.func @_QMmod1Phost_sub() { + %c10 = arith.constant 10 : index + %0 = cuf.alloc !fir.array<10xi32> {bindc_name = "adev", data_attr = #cuf.cuda, uniq_name = "_QMmod1Fhost_subEadev"} -> !fir.ref> + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + %2:2 = hlfir.declare %0(%1) {data_attr = #cuf.cuda, uniq_name = "_QMmod1Fhost_subEadev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + cuf.kernel_launch @_QMmod1Psub1<<<%c1_i32, %c1_i32, %c1_i32, %c10_i32, %c1_i32, %c1_i32>>>(%2#1) : (!fir.ref>) + return + } +} + +// CHECK-LABEL: func.func @_QMmod1Phost_sub() +// CHECK: gpu.launch_func @cuda_device_mod::@_QMmod1Psub1 clusters in (%c2{{.*}}, %c2{{.*}}, %c1{{.*}}) + From 255e441613e39a391e9f85d6a605cc9e46dcf273 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Tue, 29 Oct 2024 17:16:17 -0700 Subject: [PATCH 372/425] X86: Do not return invalid cost for fp16 conversion (#114128) Returning invalid instruction costs when converting from/to fp16 in `X86TTIImpl::getCastInstrCost` when there is no hardware support available was triggering asserts. This changes the code to return a large (arbitrary) number to model the fact that libcalls are used to implement the conversion. This also simplifies the code by only reporting costs for the scalar fp16 conversion; vectorized costs being left to the fallback assuming scalarization. This is a follow-up to assertion issues reported for the changes in #113195 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 12 +++++++----- .../Transforms/SLPVectorizer/X86/conversion-fp16.ll | 11 +++-------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index bae223243b3dc98..520284d1d7a4887 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3068,6 +3068,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (auto KindCost = Entry->Cost[CostKind]) return *KindCost; } + + if ((ISD == ISD::FP_ROUND && SimpleDstTy == MVT::f16) || + (ISD == ISD::FP_EXTEND && SimpleSrcTy == MVT::f16)) { + // fp16 conversions not covered by any table entries require a libcall. + // Return a large (arbitrary) number to model this. + return InstructionCost(64); + } } // Fall back to legalized types. @@ -3174,11 +3181,6 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, TTI::CastContextHint::None, CostKind); } - if (ISD == ISD::FP_ROUND && LTDest.second.getScalarType() == MVT::f16) { - // Conversion requires a libcall. - return InstructionCost::getInvalid(); - } - // TODO: Allow non-throughput costs that aren't binary. auto AdjustCost = [&CostKind](InstructionCost Cost, InstructionCost N = 1) -> InstructionCost { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll index bcea147d724f53b..f23043f0c47f4a3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll @@ -453,14 +453,9 @@ define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) { ; ; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16( ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { -; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8 -; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8 -; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[S0]], align 4 -; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <8 x float> [[TMP1]] to <8 x half> -; CHECK-F16C-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[S8]], align 4 -; CHECK-F16C-NEXT: [[TMP4:%.*]] = fptrunc <8 x float> [[TMP3]] to <8 x half> -; CHECK-F16C-NEXT: store <8 x half> [[TMP2]], ptr [[D0]], align 2 -; CHECK-F16C-NEXT: store <8 x half> [[TMP4]], ptr [[D8]], align 2 +; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4 +; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half> +; CHECK-F16C-NEXT: store <16 x half> [[TMP2]], ptr [[D0]], align 2 ; CHECK-F16C-NEXT: ret void ; ; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16( From 13a3c4f97cf33279d597148ec48c71337aa16e9a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Oct 2024 17:46:52 -0700 Subject: [PATCH 373/425] [RISCV] Add OperandType to frmarg and rtzarg. (#114142) Teach RISCVInstrInfo::verifyInstruction to validate them. This is partially extracted from #89047, but that did not include the verification. --- llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 7 ++++++- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 6 ++++++ llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 4 ++++ llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index d82f78498418da7..e18329c3d2dd495 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -330,7 +330,12 @@ enum OperandType : unsigned { OPERAND_RVKRNUM_1_10, OPERAND_RVKRNUM_2_14, OPERAND_SPIMM, - OPERAND_LAST_RISCV_IMM = OPERAND_SPIMM, + // Operand is a 3-bit rounding mode, '111' indicates FRM register. + // Represents 'frm' argument passing to floating-point operations. + OPERAND_FRMARG, + // Operand is a 3-bit rounding mode where only RTZ is valid. + OPERAND_RTZARG, + OPERAND_LAST_RISCV_IMM = OPERAND_RTZARG, // Operand is either a register or uimm5, this is used by V extension pseudo // instructions to represent a value that be passed as AVL to either vsetvli // or vsetivli. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index a3963fadf3e417a..20e531657eb2860 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2536,6 +2536,12 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_SPIMM: Ok = (Imm & 0xf) == 0; break; + case RISCVOp::OPERAND_FRMARG: + Ok = RISCVFPRndMode::isValidRoundingMode(Imm); + break; + case RISCVOp::OPERAND_RTZARG: + Ok = Imm == RISCVFPRndMode::RTZ; + break; } if (!Ok) { ErrInfo = "Invalid immediate"; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index a134f37c774954b..da3f207a2faf728 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -134,6 +134,8 @@ def frmarg : Operand { let ParserMatchClass = FRMArg; let PrintMethod = "printFRMArg"; let DecoderMethod = "decodeFRMArg"; + let OperandType = "OPERAND_FRMARG"; + let OperandNamespace = "RISCVOp"; } // Variants of the rounding mode operand that default to 'rne'. This is used @@ -154,6 +156,8 @@ def frmarglegacy : Operand { let ParserMatchClass = FRMArgLegacy; let PrintMethod = "printFRMArgLegacy"; let DecoderMethod = "decodeFRMArg"; + let OperandType = "OPERAND_FRMARG"; + let OperandNamespace = "RISCVOp"; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index f62a7e1221122b8..2bdcfd21270e906 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -48,6 +48,8 @@ def rtzarg : Operand { let ParserMatchClass = RTZArg; let PrintMethod = "printFRMArg"; let DecoderMethod = "decodeRTZArg"; + let OperandType = "OPERAND_RTZARG"; + let OperandNamespace = "RISCVOp"; } //===----------------------------------------------------------------------===// From d9268289c3858c4ae877ff3bb90f28c160a977c8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Oct 2024 18:08:26 -0700 Subject: [PATCH 374/425] [RISCV] Add sha and supm to checks in riscv-profiles.c (#114123) --- clang/test/Driver/riscv-profiles.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/test/Driver/riscv-profiles.c b/clang/test/Driver/riscv-profiles.c index 2b4d19422874cfb..67e09d0e69ebc3e 100644 --- a/clang/test/Driver/riscv-profiles.c +++ b/clang/test/Driver/riscv-profiles.c @@ -147,6 +147,7 @@ // RVA23U64: "-target-feature" "+zvbb" // RVA23U64: "-target-feature" "+zvfhmin" // RVA23U64: "-target-feature" "+zvkt" +// RVA23U64: "-target-feature" "+supm" // RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23s64 \ // RUN: | FileCheck -check-prefix=RVA23S64 %s @@ -186,6 +187,7 @@ // RVA23S64: "-target-feature" "+zvbb" // RVA23S64: "-target-feature" "+zvfhmin" // RVA23S64: "-target-feature" "+zvkt" +// RVA23S64: "-target-feature" "+sha" // RVA23S64: "-target-feature" "+shcounterenw" // RVA23S64: "-target-feature" "+shgatpa" // RVA23S64: "-target-feature" "+shtvala" @@ -201,6 +203,7 @@ // RVA23S64: "-target-feature" "+sstvala" // RVA23S64: "-target-feature" "+sstvecd" // RVA23S64: "-target-feature" "+ssu64xl" +// RVA23S64: "-target-feature" "+supm" // RVA23S64: "-target-feature" "+svade" // RVA23S64: "-target-feature" "+svbare" // RVA23S64: "-target-feature" "+svinval" From 8800b739bfe3ddc0bd32c158a016ffd0eee1e352 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Wed, 30 Oct 2024 09:27:35 +0800 Subject: [PATCH 375/425] [RISCV] Refactor FP, SP and RA in RISCVFrameLowering.cpp. NFC (#113818) Those registers are too fragmented in terms of usage, some are hard coded and some are retrieved by calling function. Also some have comments for alias name, some don't. --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 48 +++++++++----------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index d70903519ecb05d..f5851f371545191 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -42,10 +42,19 @@ RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI) /*TransientStackAlignment=*/getABIStackAlignment(STI.getTargetABI())), STI(STI) {} +// The register used to hold the frame pointer. +static constexpr Register FPReg = RISCV::X8; + +// The register used to hold the stack pointer. +static constexpr Register SPReg = RISCV::X2; + +// The register used to hold the return address. +static constexpr Register RAReg = RISCV::X1; + // Offsets which need to be scale by XLen representing locations of CSRs which // are given a fixed location by save/restore libcalls or Zcmp Push/Pop. static const std::pair FixedCSRFIMap[] = { - {/*ra*/ RISCV::X1, -1}, {/*s0*/ RISCV::X8, -2}, + {/*ra*/ RAReg, -1}, {/*s0*/ FPReg, -2}, {/*s1*/ RISCV::X9, -3}, {/*s2*/ RISCV::X18, -4}, {/*s3*/ RISCV::X19, -5}, {/*s4*/ RISCV::X20, -6}, {/*s5*/ RISCV::X21, -7}, {/*s6*/ RISCV::X22, -8}, @@ -187,6 +196,7 @@ static int getLibCallID(const MachineFunction &MF, switch (MaxReg) { default: llvm_unreachable("Something has gone wrong!"); + // clang-format off case /*s11*/ RISCV::X27: return 12; case /*s10*/ RISCV::X26: return 11; case /*s9*/ RISCV::X25: return 10; @@ -198,8 +208,9 @@ static int getLibCallID(const MachineFunction &MF, case /*s3*/ RISCV::X19: return 4; case /*s2*/ RISCV::X18: return 3; case /*s1*/ RISCV::X9: return 2; - case /*s0*/ RISCV::X8: return 1; - case /*ra*/ RISCV::X1: return 0; + case /*s0*/ FPReg: return 1; + case /*ra*/ RAReg: return 0; + // clang-format on } } @@ -284,9 +295,9 @@ getPushPopEncodingAndNum(const Register MaxReg) { return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S2, 4); case RISCV::X9: /*s1*/ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S1, 3); - case RISCV::X8: /*s0*/ + case FPReg: /*s0*/ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0, 2); - case RISCV::X1: /*ra*/ + case RAReg: /*ra*/ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA, 1); } } @@ -372,12 +383,6 @@ uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding( return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign()); } -// Returns the register used to hold the frame pointer. -static Register getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; } - -// Returns the register used to hold the stack pointer. -static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } - static SmallVector getUnmanagedCSI(const MachineFunction &MF, const std::vector &CSI) { @@ -415,8 +420,6 @@ void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF, MachineInstr::MIFlag Flag) const { assert(Amount != 0 && "Did not need to adjust stack pointer for RVV."); - const Register SPReg = getSPReg(STI); - // Optimize compile time offset case StackOffset Offset = StackOffset::getScalable(Amount); if (auto VLEN = STI.getRealVLen()) { @@ -479,7 +482,7 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); Expr.push_back(0); - if (Reg == RISCV::X2) + if (Reg == SPReg) Comment << "sp"; else Comment << printReg(Reg, &TRI); @@ -530,8 +533,6 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, const RISCVInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); - Register FPReg = getFPReg(STI); - Register SPReg = getSPReg(STI); Register BPReg = RISCVABI::getBPReg(); // Debug location must be unknown since the first debug location is used @@ -762,8 +763,6 @@ void RISCVFrameLowering::deallocateStack(MachineFunction &MF, int64_t CFAOffset) const { const RISCVRegisterInfo *RI = STI.getRegisterInfo(); - Register SPReg = getSPReg(STI); - RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize), MachineInstr::FrameDestroy, getStackAlign()); } @@ -773,8 +772,6 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, const RISCVRegisterInfo *RI = STI.getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); auto *RVFI = MF.getInfo(); - Register FPReg = getFPReg(STI); - Register SPReg = getSPReg(STI); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -922,7 +919,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } if (FI >= MinCSFI && FI <= MaxCSFI) { - FrameReg = RISCV::X2; + FrameReg = SPReg; if (FirstSPAdjustAmount) Offset += StackOffset::getFixed(FirstSPAdjustAmount); @@ -969,13 +966,13 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } else { // VarSize objects must be empty in this case! assert(!MFI.hasVarSizedObjects()); - FrameReg = RISCV::X2; + FrameReg = SPReg; } } else { FrameReg = RI->getFrameRegister(MF); } - if (FrameReg == getFPReg(STI)) { + if (FrameReg == FPReg) { Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); // When using FP to access scalable vector objects, we need to minus // the frame size. @@ -1067,8 +1064,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, // Unconditionally spill RA and FP only if the function uses a frame // pointer. if (hasFP(MF)) { - SavedRegs.set(RISCV::X1); - SavedRegs.set(RISCV::X8); + SavedRegs.set(RAReg); + SavedRegs.set(FPReg); } // Mark BP as used if function has dedicated base pointer. if (hasBP(MF)) @@ -1328,7 +1325,6 @@ bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { - Register SPReg = RISCV::X2; DebugLoc DL = MI->getDebugLoc(); if (!hasReservedCallFrame(MF)) { From 63eb40eeb1b7aac57a181f6b5f9170ea94cef738 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 30 Oct 2024 09:49:26 +0800 Subject: [PATCH 376/425] [libc++] Deprecate and remove meaningless `` headers (#111615) This PR deprecates ``, ``, ``, and `` in C++17 and "removes" them in C++20 by special deprecation warnings. `` is previously missing. This PR also tries to add them, and then deprecates and "removes" ``. Papers: - https://wg21.link/P0063R3 - https://wg21.link/P0619R4 Closes #99985. --------- Co-authored-by: Louis Dionne --- libcxx/docs/ReleaseNotes/20.rst | 6 ++ libcxx/docs/Status/Cxx20Papers.csv | 2 +- libcxx/include/CMakeLists.txt | 1 + libcxx/include/ccomplex | 12 ++++ libcxx/include/ciso646 | 7 +++ libcxx/include/complex.h | 2 +- libcxx/include/cstdalign | 55 +++++++++++++++++++ libcxx/include/cstdbool | 12 ++++ libcxx/include/ctgmath | 14 ++++- libcxx/include/module.modulemap | 5 ++ libcxx/include/tgmath.h | 3 +- .../test/libcxx/clang_modules_include.gen.py | 13 ++++- libcxx/test/libcxx/double_include.gen.py | 13 ++++- libcxx/test/libcxx/header_inclusions.gen.py | 37 ++++++++----- libcxx/test/libcxx/include_as_c.sh.cpp | 1 + libcxx/test/libcxx/libcpp_version.gen.py | 13 ++++- libcxx/test/libcxx/no_assert_include.gen.py | 18 ++++-- .../test/libcxx/system_reserved_names.gen.py | 8 ++- .../test/libcxx/transitive_includes/cxx03.csv | 1 - .../test/libcxx/transitive_includes/cxx11.csv | 1 - .../test/libcxx/transitive_includes/cxx14.csv | 1 - .../test/libcxx/transitive_includes/cxx17.csv | 1 - .../test/libcxx/transitive_includes/cxx20.csv | 1 - .../test/libcxx/transitive_includes/cxx23.csv | 1 - .../test/libcxx/transitive_includes/cxx26.csv | 1 - .../depr.c.headers/ciso646.compile.pass.cpp | 4 +- .../stdalign_h.compile.pass.cpp | 33 +++++++++++ .../depr/depr.cpp.headers/ccomplex.verify.cpp | 25 +++++++++ .../depr/depr.cpp.headers/ciso646.verify.cpp | 18 ++++++ .../depr.cpp.headers/cstdalign.verify.cpp | 25 +++++++++ .../depr/depr.cpp.headers/cstdbool.verify.cpp | 25 +++++++++ .../depr/depr.cpp.headers/ctgmath.verify.cpp | 25 +++++++++ .../cstdalign.compile.pass.cpp | 29 ++++++++++ .../support.runtime/cstdbool.pass.cpp | 4 +- .../test/std/numerics/c.math/ctgmath.pass.cpp | 4 +- .../complex.number/ccmplx/ccomplex.pass.cpp | 4 +- libcxx/utils/libcxx/header_information.py | 12 +++- .../gn/secondary/libcxx/include/BUILD.gn | 1 + 38 files changed, 394 insertions(+), 44 deletions(-) create mode 100644 libcxx/include/cstdalign create mode 100644 libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp create mode 100644 libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp create mode 100644 libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp create mode 100644 libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp create mode 100644 libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp create mode 100644 libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp create mode 100644 libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 38b8df3b2a7718b..bf3aafe6139ee95 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -38,6 +38,7 @@ What's New in Libc++ 20.0.0? Implemented Papers ------------------ +- P0619R4: Reviewing Deprecated Facilities of C++17 for C++20 (`Github `__) - P2747R2: ``constexpr`` placement new (`Github `__) - P2609R3: Relaxing Ranges Just A Smidge (`Github `__) - P2985R0: A type trait for detecting virtual base classes (`Github `__) @@ -89,6 +90,11 @@ Deprecations and Removals the ``_LIBCPP_VERBOSE_ABORT_NOT_NOEXCEPT`` macro can be defined to make the function non-``noexcept``. That macro will be removed in LLVM 21. +- ````, ```` (previously missing), ````, and ```` are deprecated since C++17 as + specified by the standard. They, together with ````, are removed in C++20, but libc++ still provides these + headers as an extension and only deprecates them. The ``_LIBCPP_DISABLE_DEPRECATION_WARNINGS`` macro can be defined to + suppress deprecation for these headers. + Upcoming Deprecations and Removals ---------------------------------- diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv index 9a057be8ad0519c..5cd77be4d58def7 100644 --- a/libcxx/docs/Status/Cxx20Papers.csv +++ b/libcxx/docs/Status/Cxx20Papers.csv @@ -34,7 +34,7 @@ "`P0528R3 `__","The Curious Case of Padding Bits, Featuring Atomic Compare-and-Exchange","2018-06 (Rapperswil)","","","" "`P0542R5 `__","Support for contract based programming in C++","2018-06 (Rapperswil)","|Nothing To Do|","n/a","Pulled at the 2019-07 meeting in Cologne" "`P0556R3 `__","Integral power-of-2 operations","2018-06 (Rapperswil)","|Complete|","9.0","" -"`P0619R4 `__","Reviewing Deprecated Facilities of C++17 for C++20","2018-06 (Rapperswil)","|Partial|","","Only sections D.7, D.8, D.9, D.10, D.11, D.12, and D.13 are implemented. Section D.4 remains undone." +"`P0619R4 `__","Reviewing Deprecated Facilities of C++17 for C++20","2018-06 (Rapperswil)","|Complete|","20.0","Removed headers are still provided as an extension, but with deprecation warnings" "`P0646R1 `__","Improving the Return Value of Erase-Like Algorithms","2018-06 (Rapperswil)","|Complete|","10.0","" "`P0722R3 `__","Efficient sized delete for variable sized classes","2018-06 (Rapperswil)","|Complete|","9.0","" "`P0758R1 `__","Implicit conversion traits and utility functions","2018-06 (Rapperswil)","|Complete|","","" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index e84a55e25f2fa42..87eaf64b2450171 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -923,6 +923,7 @@ set(files coroutine csetjmp csignal + cstdalign cstdarg cstdbool cstddef diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex index 94d2c8d7d003d4f..d379c9e7f0174a9 100644 --- a/libcxx/include/ccomplex +++ b/libcxx/include/ccomplex @@ -23,4 +23,16 @@ # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_ccomplex _LIBCPP_DEPRECATED_("removed in C++20. Include instead.") = void; +using __use_standard_header_ccomplex = __standard_header_ccomplex; + +#elif _LIBCPP_STD_VER >= 17 + +using __standard_header_ccomplex _LIBCPP_DEPRECATED_("Include instead.") = void; +using __use_standard_header_ccomplex = __standard_header_ccomplex; + +#endif + #endif // _LIBCPP_CCOMPLEX diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646 index 1d859f08fac572e..5fcac79e38a7f2f 100644 --- a/libcxx/include/ciso646 +++ b/libcxx/include/ciso646 @@ -21,4 +21,11 @@ # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_ciso646 _LIBCPP_DEPRECATED_("removed in C++20. Include instead.") = void; +using __use_standard_header_ciso646 = __standard_header_ciso646; + +#endif + #endif // _LIBCPP_CISO646 diff --git a/libcxx/include/complex.h b/libcxx/include/complex.h index a3da21c843f3650..89595ae2068a641 100644 --- a/libcxx/include/complex.h +++ b/libcxx/include/complex.h @@ -24,7 +24,7 @@ #endif #ifdef __cplusplus -# include +# include #elif __has_include_next() # include_next #endif diff --git a/libcxx/include/cstdalign b/libcxx/include/cstdalign new file mode 100644 index 000000000000000..e6a2a3c71774220 --- /dev/null +++ b/libcxx/include/cstdalign @@ -0,0 +1,55 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_CSTDALIGN +#define _LIBCPP_CSTDALIGN + +/* + cstdalign synopsis + +Macros: + + __alignas_is_defined + __alignof_is_defined + +*/ + +#include <__config> + +// is not provided by libc++ +#if __has_include() +# include +# ifdef _LIBCPP_STDALIGN_H +# error "If libc++ starts defining , the __has_include check should move to libc++'s " +# endif +#endif + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#undef __alignas_is_defined +#define __alignas_is_defined 1 + +#undef __alignof_is_defined +#define __alignof_is_defined 1 + +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_cstdalign _LIBCPP_DEPRECATED_("removed in C++20.") = void; +using __use_standard_header_cstdalign = __standard_header_cstdalign; + +#elif _LIBCPP_STD_VER >= 17 + +using __standard_header_cstdalign _LIBCPP_DEPRECATED = void; +using __use_standard_header_cstdalign = __standard_header_cstdalign; + +#endif + +#endif // _LIBCPP_CSTDALIGN diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool index ef731c021a4ab88..1d627258e10c094 100644 --- a/libcxx/include/cstdbool +++ b/libcxx/include/cstdbool @@ -28,4 +28,16 @@ Macros: #undef __bool_true_false_are_defined #define __bool_true_false_are_defined 1 +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_cstdbool _LIBCPP_DEPRECATED_("removed in C++20.") = void; +using __use_standard_header_cstdbool = __standard_header_cstdbool; + +#elif _LIBCPP_STD_VER >= 17 + +using __standard_header_cstdbool _LIBCPP_DEPRECATED = void; +using __use_standard_header_cstdbool = __standard_header_cstdbool; + +#endif + #endif // _LIBCPP_CSTDBOOL diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath index 6237979be4906cc..7dbe952f021b74f 100644 --- a/libcxx/include/ctgmath +++ b/libcxx/include/ctgmath @@ -18,11 +18,23 @@ */ -#include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_ctgmath _LIBCPP_DEPRECATED_("removed in C++20. Include and instead.") = void; +using __use_standard_header_ctgmath = __standard_header_ctgmath; + +#elif _LIBCPP_STD_VER >= 17 + +using __standard_header_ctgmath _LIBCPP_DEPRECATED_("Include and instead.") = void; +using __use_standard_header_ctgmath = __standard_header_ctgmath; + +#endif + #endif // _LIBCPP_CTGMATH diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index c3d080007319927..af8c3c15eb2767f 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1109,6 +1109,11 @@ module std [system] { export * } + module cstdalign { + header "cstdalign" + export * + } + module cstdarg { header "cstdarg" export * diff --git a/libcxx/include/tgmath.h b/libcxx/include/tgmath.h index e6f0a4ab2611fa8..1c5058cb065a91c 100644 --- a/libcxx/include/tgmath.h +++ b/libcxx/include/tgmath.h @@ -24,7 +24,8 @@ #endif #ifdef __cplusplus -# include +# include +# include #else # if __has_include_next() # include_next diff --git a/libcxx/test/libcxx/clang_modules_include.gen.py b/libcxx/test/libcxx/clang_modules_include.gen.py index bc028f2a0809aa0..b897984f898819f 100644 --- a/libcxx/test/libcxx/clang_modules_include.gen.py +++ b/libcxx/test/libcxx/clang_modules_include.gen.py @@ -17,10 +17,15 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: - print(f"""\ + print( + f"""\ //--- {header}.compile.pass.cpp // RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only @@ -41,9 +46,11 @@ // UNSUPPORTED: LIBCXX-PICOLIBC-FIXME {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #include <{header}> -""") +""" + ) print( f"""\ diff --git a/libcxx/test/libcxx/double_include.gen.py b/libcxx/test/libcxx/double_include.gen.py index afc2947dbece94d..f58e72f94a35335 100644 --- a/libcxx/test/libcxx/double_include.gen.py +++ b/libcxx/test/libcxx/double_include.gen.py @@ -15,12 +15,18 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: - print(f"""\ + print( + f"""\ //--- {header}.sh.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} // RUN: %{{cxx}} -c %s -o %t.first.o %{{flags}} %{{compile_flags}} // RUN: %{{cxx}} -c %s -o %t.second.o -DWITH_MAIN %{{flags}} %{{compile_flags}} @@ -32,4 +38,5 @@ #if defined(WITH_MAIN) int main(int, char**) {{ return 0; }} #endif -""") +""" + ) diff --git a/libcxx/test/libcxx/header_inclusions.gen.py b/libcxx/test/libcxx/header_inclusions.gen.py index e5def1ad4cb70d9..739caf915c09a0c 100644 --- a/libcxx/test/libcxx/header_inclusions.gen.py +++ b/libcxx/test/libcxx/header_inclusions.gen.py @@ -12,32 +12,43 @@ # RUN: %{python} %s %{libcxx-dir}/utils import sys + sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers, mandatory_inclusions +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, + mandatory_inclusions, +) for header in public_headers: - header_guard = lambda h: f"_LIBCPP_{str(h).upper().replace('.', '_').replace('/', '_')}" + header_guard = ( + lambda h: f"_LIBCPP_{str(h).upper().replace('.', '_').replace('/', '_')}" + ) - # has no header guards - if header == 'cassert': - checks = '' - else: - checks = f''' + # has no header guards + if header == "cassert": + checks = "" + else: + checks = f""" #ifndef {header_guard(header)} # error <{header}> was expected to define a header guard {header_guard(header)} #endif -''' - for includee in mandatory_inclusions.get(header, []): - checks += f''' +""" + for includee in mandatory_inclusions.get(header, []): + checks += f""" #ifndef {header_guard(includee)} # error <{header}> was expected to include <{includee}> #endif -''' +""" - print(f"""\ + print( + f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #include <{header}> {checks} -""") +""" + ) diff --git a/libcxx/test/libcxx/include_as_c.sh.cpp b/libcxx/test/libcxx/include_as_c.sh.cpp index c9f8dfd9a5a9222..204b830462cf944 100644 --- a/libcxx/test/libcxx/include_as_c.sh.cpp +++ b/libcxx/test/libcxx/include_as_c.sh.cpp @@ -34,6 +34,7 @@ #endif #include #include +#include #include #include #include diff --git a/libcxx/test/libcxx/libcpp_version.gen.py b/libcxx/test/libcxx/libcpp_version.gen.py index a9995295e21e4f4..b30623fe2c388bc 100644 --- a/libcxx/test/libcxx/libcpp_version.gen.py +++ b/libcxx/test/libcxx/libcpp_version.gen.py @@ -12,16 +12,23 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: - print(f"""\ + print( + f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #include <{header}> #ifndef _LIBCPP_VERSION # error <{header}> does not seem to define _LIBCPP_VERSION #endif -""") +""" + ) diff --git a/libcxx/test/libcxx/no_assert_include.gen.py b/libcxx/test/libcxx/no_assert_include.gen.py index 67ab98603ca8fde..e0dbc3d815f31b1 100644 --- a/libcxx/test/libcxx/no_assert_include.gen.py +++ b/libcxx/test/libcxx/no_assert_include.gen.py @@ -12,20 +12,28 @@ # RUN: %{python} %s %{libcxx-dir}/utils import sys + sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: - if header == 'cassert': - continue + if header == "cassert": + continue - print(f"""\ + print( + f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #include <{header}> #ifdef assert # error "Do not include cassert or assert.h in standard header files" #endif -""") +""" + ) diff --git a/libcxx/test/libcxx/system_reserved_names.gen.py b/libcxx/test/libcxx/system_reserved_names.gen.py index e29e7a2cdd61449..f01126249c88171 100644 --- a/libcxx/test/libcxx/system_reserved_names.gen.py +++ b/libcxx/test/libcxx/system_reserved_names.gen.py @@ -13,14 +13,20 @@ # RUN: %{python} %s %{libcxx-dir}/utils import sys + sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: print( f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #define SYSTEM_RESERVED_NAME This name should not be used in libc++ diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 2dc84963f0891ee..48c501863cb76ca 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -458,7 +458,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index 2dc84963f0891ee..48c501863cb76ca 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -458,7 +458,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index 27e229755735840..6191c9012c631b4 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -467,7 +467,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index b17eb1f2347a86c..5d46162e3f8996d 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -458,7 +458,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index 9efec327889c1de..20fe9878ce3eae0 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -446,7 +446,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv index e17f732663a9b28..5ee89ec307cc296 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -238,7 +238,6 @@ coroutine limits coroutine version cstddef version ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv index c56f5cdfad00727..ee17223e66bee4c 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -238,7 +238,6 @@ coroutine limits coroutine version cstddef version ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp index 4dff57f84f202cd..764f4d02f44f44a 100644 --- a/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// -// +// // removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS #include diff --git a/libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp new file mode 100644 index 000000000000000..e7290aab2c6616b --- /dev/null +++ b/libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// test +// +// Even though is not provided by libc++, +// we still test that using it with libc++ on the search path will work. + +// TODO: GCC doesn't provide a proper for C++ until 15. +// UNSUPPORTED: gcc + +#include + +#ifndef __alignas_is_defined +# error __alignas_is_defined not defined +#endif + +#ifndef __alignof_is_defined +# error __alignof_is_defined not defined +#endif + +#ifdef alignas +# error alignas should not be defined +#endif + +#ifdef alignof +# error alignof should not be defined +#endif diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp new file mode 100644 index 000000000000000..0eaf82ce5cef016 --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is deprecated in C++17 and removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14 +// UNSUPPORTED: clang-modules-build + +#include "test_macros.h" + +#include + +#if TEST_STD_VER >= 20 +// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: removed in C++20. Include instead.}} +#else +// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: Include instead.}} +#endif diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp new file mode 100644 index 000000000000000..04acd1008154856 --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: clang-modules-build + +#include +// expected-warning@ciso646:* {{'__standard_header_ciso646' is deprecated: removed in C++20. Include instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp new file mode 100644 index 000000000000000..dc9f1af55b3f14b --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is deprecated in C++17 and removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14 +// UNSUPPORTED: clang-modules-build + +#include "test_macros.h" + +#include + +#if TEST_STD_VER >= 20 +// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated: removed in C++20.}} +#else +// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated}} +#endif diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp new file mode 100644 index 000000000000000..eddefe14d35eac8 --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is deprecated in C++17 and removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14 +// UNSUPPORTED: clang-modules-build + +#include "test_macros.h" + +#include + +#if TEST_STD_VER >= 20 +// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated: removed in C++20.}} +#else +// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated}} +#endif diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp new file mode 100644 index 000000000000000..097ab1643d15afd --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is deprecated in C++17 and removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14 +// UNSUPPORTED: clang-modules-build + +#include "test_macros.h" + +#include + +#if TEST_STD_VER >= 20 +// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: removed in C++20. Include and instead.}} +#else +// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: Include and instead.}} +#endif diff --git a/libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp new file mode 100644 index 000000000000000..fbbaf9b2d136f94 --- /dev/null +++ b/libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// test // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + +#include + +#ifndef __alignas_is_defined +# error __alignas_is_defined not defined +#endif + +#ifndef __alignof_is_defined +# error __alignof_is_defined not defined +#endif + +#ifdef alignas +# error alignas should not be defined +#endif + +#ifdef alignof +# error alignof should not be defined +#endif diff --git a/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp index 1d0e9b06a43d2eb..9a35eea507c40ad 100644 --- a/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -// test +// test // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS #include diff --git a/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp b/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp index 2c8d054fbc527df..2e4679980577a9b 100644 --- a/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp +++ b/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -// +// // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS #include diff --git a/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp b/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp index cc3f8cd6a9beb95..0ed116c6410639c 100644 --- a/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -// +// // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS #include diff --git a/libcxx/utils/libcxx/header_information.py b/libcxx/utils/libcxx/header_information.py index 3b12dcb9f56c0b7..cac620e4f1fe777 100644 --- a/libcxx/utils/libcxx/header_information.py +++ b/libcxx/utils/libcxx/header_information.py @@ -66,6 +66,7 @@ def is_cstd(self) -> bool: "cmath", "csetjmp", "csignal", + "cstdalign", "cstdarg", "cstdbool", "cstddef", @@ -92,7 +93,7 @@ def has_cxx20_module(self) -> bool: experimental headers. """ # These headers have been removed in C++20 so are never part of a module. - removed_in_20 = ["ccomplex", "ciso646", "cstdbool", "ctgmath"] + removed_in_20 = ["ccomplex", "ciso646", "cstdalign", "cstdbool", "ctgmath"] return self.is_public() and not self.is_experimental() and not self.is_C_compatibility() and not self._name in removed_in_20 def is_cxx03_frozen_header(self) -> bool: @@ -236,6 +237,15 @@ def __hash__(self) -> int: "wctype.h": "// UNSUPPORTED: no-wide-characters", } +# Undeprecate headers that are deprecated in C++17 and removed in C++20. +lit_header_undeprecations = { + "ccomplex": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", + "ciso646": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", + "cstdalign": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", + "cstdbool": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", + "ctgmath": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", +} + # This table was produced manually, by grepping the TeX source of the Standard's # library clauses for the string "#include". Each header's synopsis contains # explicit "#include" directives for its mandatory inclusions. diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 8121e34dcf6eff7..776f1d32c5f520f 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -995,6 +995,7 @@ if (current_toolchain == default_toolchain) { "coroutine", "csetjmp", "csignal", + "cstdalign", "cstdarg", "cstdbool", "cstddef", From facdae62b7be4fe177c8a130c68aef0305dc6eb3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 29 Oct 2024 19:14:54 -0700 Subject: [PATCH 377/425] [MCInstPrinter] Make printRegName non-const Similar to printInst. printRegName may change states (e.g. #113834). --- llvm/include/llvm/MC/MCInstPrinter.h | 4 ++-- llvm/include/llvm/MC/MCParser/MCAsmParser.h | 2 +- llvm/lib/MC/MCInstPrinter.cpp | 5 ++--- llvm/lib/MC/MCParser/AsmParser.cpp | 4 ++-- llvm/lib/MC/MCParser/MasmParser.cpp | 4 ++-- llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp | 4 ++-- llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h | 4 ++-- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 2 +- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h | 2 +- llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp | 2 +- llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h | 2 +- llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp | 4 ++-- llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h | 2 +- llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp | 2 +- llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h | 2 +- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp | 2 +- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h | 2 +- llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp | 2 +- llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h | 2 +- .../Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp | 2 +- .../lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp | 2 +- llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h | 2 +- llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp | 2 +- llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h | 2 +- llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp | 2 +- llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h | 2 +- llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp | 2 +- llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h | 2 +- llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp | 2 +- llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h | 2 +- llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 2 +- llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h | 2 +- llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp | 2 +- llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h | 2 +- .../Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp | 2 +- llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h | 2 +- .../Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp | 2 +- .../Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h | 2 +- .../Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp | 3 +-- .../Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h | 4 ++-- llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp | 2 +- llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h | 2 +- .../WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp | 3 +-- .../Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h | 2 +- llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp | 2 +- llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h | 2 +- llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp | 2 +- llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h | 2 +- llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp | 2 +- llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h | 2 +- llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp | 2 +- llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h | 2 +- 53 files changed, 61 insertions(+), 64 deletions(-) diff --git a/llvm/include/llvm/MC/MCInstPrinter.h b/llvm/include/llvm/MC/MCInstPrinter.h index 60a901e3d0deae6..0b9c738a7a0a319 100644 --- a/llvm/include/llvm/MC/MCInstPrinter.h +++ b/llvm/include/llvm/MC/MCInstPrinter.h @@ -144,7 +144,7 @@ class MCInstPrinter { StringRef getOpcodeName(unsigned Opcode) const; /// Print the assembler register name. - virtual void printRegName(raw_ostream &OS, MCRegister Reg) const; + virtual void printRegName(raw_ostream &OS, MCRegister Reg); bool getUseMarkup() const { return UseMarkup; } void setUseMarkup(bool Value) { UseMarkup = Value; } @@ -152,7 +152,7 @@ class MCInstPrinter { bool getUseColor() const { return UseColor; } void setUseColor(bool Value) { UseColor = Value; } - WithMarkup markup(raw_ostream &OS, Markup M) const; + WithMarkup markup(raw_ostream &OS, Markup M); bool getPrintImmHex() const { return PrintImmHex; } void setPrintImmHex(bool Value) { PrintImmHex = Value; } diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index faa72d5f3144c43..70fba69778536e4 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -206,7 +206,7 @@ class MCAsmParser { SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, - const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0; + MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0; /// Emit a note at the location \p L, with the message \p Msg. virtual void Note(SMLoc L, const Twine &Msg, diff --git a/llvm/lib/MC/MCInstPrinter.cpp b/llvm/lib/MC/MCInstPrinter.cpp index e4faeba04a8fd7f..488e34a6d53954d 100644 --- a/llvm/lib/MC/MCInstPrinter.cpp +++ b/llvm/lib/MC/MCInstPrinter.cpp @@ -43,7 +43,7 @@ StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { return MII.getName(Opcode); } -void MCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void MCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { llvm_unreachable("Target should implement this"); } @@ -224,8 +224,7 @@ format_object MCInstPrinter::formatHex(uint64_t Value) const { llvm_unreachable("unsupported print style"); } -MCInstPrinter::WithMarkup MCInstPrinter::markup(raw_ostream &OS, - Markup S) const { +MCInstPrinter::WithMarkup MCInstPrinter::markup(raw_ostream &OS, Markup S) { return WithMarkup(OS, S, getUseMarkup(), getUseColor()); } diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 4774e5112af535f..ecccb228c8c3875 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -264,7 +264,7 @@ class AsmParser : public MCAsmParser { SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, - const MCInstrInfo *MII, const MCInstPrinter *IP, + const MCInstrInfo *MII, MCInstPrinter *IP, MCAsmParserSemaCallback &SI) override; bool parseExpression(const MCExpr *&Res); @@ -6006,7 +6006,7 @@ bool AsmParser::parseMSInlineAsm( SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, - const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { + MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { SmallVector InputDecls; SmallVector OutputDecls; SmallVector InputDeclsAddressOf; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index d88fd09a1aa07c6..a7f37d81f640929 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -539,7 +539,7 @@ class MasmParser : public MCAsmParser { SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, - const MCInstrInfo *MII, const MCInstPrinter *IP, + const MCInstrInfo *MII, MCInstPrinter *IP, MCAsmParserSemaCallback &SI) override; bool parseExpression(const MCExpr *&Res); @@ -7340,7 +7340,7 @@ bool MasmParser::parseMSInlineAsm( SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, - const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { + MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { SmallVector InputDecls; SmallVector OutputDecls; SmallVector InputDeclsAddressOf; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 762a7af8c3ddb3d..2ee2ee5a6fa500f 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -59,12 +59,12 @@ bool AArch64InstPrinter::applyTargetSpecificCLOption(StringRef Opt) { return false; } -void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << getRegisterName(Reg); } void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg, - unsigned AltIdx) const { + unsigned AltIdx) { markup(OS, Markup::Register) << getRegisterName(Reg, AltIdx); } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h index e7b62b3203681bc..9cf2674ae943aa7 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -29,8 +29,8 @@ class AArch64InstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &OS, MCRegister Reg) const override; - void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx) const; + void printRegName(raw_ostream &OS, MCRegister Reg) override; + void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx); // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index dd8d93c3f0b72a0..88caf8196b3c90a 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -24,7 +24,7 @@ using namespace llvm; using namespace llvm::AMDGPU; -void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { // FIXME: The current implementation of // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this // as an integer or we provide a name which represents a physical register. diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index a72e0fe6ea769f5..4729b8a6aa6f401 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -29,7 +29,7 @@ class AMDGPUInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; static void printRegOperand(MCRegister Reg, raw_ostream &O, diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp index d76c2810c39f8c6..e669b9479369d63 100644 --- a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp +++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp @@ -93,7 +93,7 @@ static const char *ARCCondCodeToString(ARCCC::CondCode CC) { return BadConditionCode(CC); } -void ARCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void ARCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << StringRef(getRegisterName(Reg)).lower(); } diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h index baf4a6915b70657..c4bd73448ca71b7 100644 --- a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h +++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h @@ -30,7 +30,7 @@ class ARCInstPrinter : public MCInstPrinter { void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; void printCCOperand(const MCInst *MI, int OpNum, raw_ostream &O); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp index e4a2f8c8f2ea0ce..5a6895a4ab84ef3 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp @@ -50,7 +50,7 @@ static unsigned translateShiftImm(unsigned imm) { } static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc, - unsigned ShImm, const ARMInstPrinter &printer) { + unsigned ShImm, ARMInstPrinter &printer) { if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm)) return; O << ", "; @@ -81,7 +81,7 @@ bool ARMInstPrinter::applyTargetSpecificCLOption(StringRef Opt) { return false; } -void ARMInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void ARMInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << getRegisterName(Reg, DefaultAltIdx); } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h index 494a644cf545462..cd1dddc5f331a33 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h @@ -27,7 +27,7 @@ class ARMInstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp index 9af7958112fce30..a4b0d8488cf53b3 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp @@ -82,7 +82,7 @@ void CSKYInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation(O, Annot); } -void CSKYInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void CSKYInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { if (PrintBranchImmAsAddress) O << getRegisterName(Reg, ABIRegNames ? CSKY::ABIRegAltName : CSKY::NoRegAltName); diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h index 461d7f6f12b371b..16eccfdfb5ce5bc 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h @@ -31,7 +31,7 @@ class CSKYInstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, const char *Modifier = nullptr); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index de5c8b86978a827..e4e84a80b5d0bc7 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -26,7 +26,7 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "HexagonGenAsmWriter.inc" -void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { O << getRegisterName(Reg); } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index 38a9081c93fe794..fe37cd91dabc6ad 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -30,7 +30,7 @@ class HexagonInstPrinter : public MCInstPrinter { void printInst(MCInst const *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; static char const *getRegisterName(MCRegister Reg); diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp index 0265a75fb346c9f..4b5751eaedda052 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp @@ -31,7 +31,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "LanaiGenAsmWriter.inc" -void LanaiInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void LanaiInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << StringRef(getRegisterName(Reg)).lower(); } diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h index 55a254036fee5b6..851613b27e3dd90 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h @@ -49,7 +49,7 @@ class LanaiInstPrinter : public MCInstPrinter { unsigned OpIdx, unsigned PrintMethodIdx, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; private: bool printAlias(const MCInst *MI, raw_ostream &Ostream); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp index cb2521db5217e82..e3007cfe3d401b8 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp @@ -56,7 +56,7 @@ void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation(O, Annot); } -void LoongArchInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void LoongArchInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { O << '$' << getRegisterName(Reg); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h index 4e6092bfcb12829..8cda3fdb4510e59 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h @@ -28,7 +28,7 @@ class LoongArchInstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printAtomicMemOp(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp index 84800fc762cbb81..68ac15b57508c18 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp @@ -41,7 +41,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "M68kGenAsmWriter.inc" -void M68kInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void M68kInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << "%" << getRegisterName(Reg); } diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h index 0963176304587c2..d6d17ca9568e024 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h @@ -34,7 +34,7 @@ class M68kInstPrinter : public MCInstPrinter, void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp index 3726c600f4a7b82..d8a27f34c6fd13f 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp @@ -26,7 +26,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "MSP430GenAsmWriter.inc" -void MSP430InstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void MSP430InstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { O << getRegisterName(Reg); } diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h index 40605b92bcb01a6..413492b8efeedaf 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h @@ -22,7 +22,7 @@ namespace llvm { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp index 1518a539782efb5..2fd1b344eb687ed 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp @@ -72,7 +72,7 @@ const char* Mips::MipsFCCToString(Mips::CondCode CC) { llvm_unreachable("Impossible condition code!"); } -void MipsInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void MipsInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << '$' << StringRef(getRegisterName(Reg)).lower(); } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h index 0652b237509fe3f..8e3b4614a4aade6 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h @@ -84,7 +84,7 @@ class MipsInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index 9b589284463294c..4211ae5a2eebcde 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -34,7 +34,7 @@ NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} -void NVPTXInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void NVPTXInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { // Decode the virtual register // Must be kept in sync with NVPTXAsmPrinter::encodeVirtualRegister unsigned RCId = (Reg.id() >> 28); diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index e8a4a6dbdd5324b..63207e8a975ace2 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -24,7 +24,7 @@ class NVPTXInstPrinter : public MCInstPrinter { NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index 9a4291c90408d60..7511e24f705c18b 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -47,7 +47,7 @@ FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden, #define PRINT_ALIAS_INSTR #include "PPCGenAsmWriter.inc" -void PPCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void PPCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { const char *RegName = getRegisterName(Reg); OS << RegName; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h index 6ba3eb4c79dc990..1b9365fa04961c9 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h @@ -31,7 +31,7 @@ class PPCInstPrinter : public MCInstPrinter { const MCRegisterInfo &MRI, Triple T) : MCInstPrinter(MAI, MII, MRI), TT(T) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 1f27c934baf0dc4..1445e9da4a622de 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -75,7 +75,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation(O, Annot); } -void RISCVInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void RISCVInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { markup(O, Markup::Register) << getRegisterName(Reg); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h index 77cc7a67e88920d..c15fd591b9e9565 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -28,7 +28,7 @@ class RISCVInstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, const char *Modifier = nullptr); diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp index 5b407a8b6f54a0f..4bba54463103bcb 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp @@ -38,7 +38,7 @@ bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const { return (STI.hasFeature(Sparc::FeatureV9)) != 0; } -void SparcInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void SparcInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << '%' << getRegisterName(Reg); } diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h index 207a970228058da..52321d562118583 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h @@ -24,7 +24,7 @@ class SparcInstPrinter : public MCInstPrinter { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx) const; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp index 05113010794e0b6..72b7bd60276a7a8 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp @@ -19,7 +19,7 @@ using namespace llvm; void SystemZGNUInstPrinter::printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const { + raw_ostream &O) { const char *RegName = getRegisterName(Reg); markup(O, Markup::Register) << '%' << RegName; } diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h index 8f62ae0e16c006d..7095e325c70bc00 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h @@ -38,7 +38,7 @@ class SystemZGNUInstPrinter : public SystemZInstPrinterCommon { private: void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const override; + raw_ostream &O) override; }; } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp index 9abd408324c0672..ef9881932f7c085 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp @@ -19,7 +19,7 @@ using namespace llvm; void SystemZHLASMInstPrinter::printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const { + raw_ostream &O) { const char *RegName = getRegisterName(Reg); // Skip register prefix so that only register number is left assert(isalpha(RegName[0]) && isdigit(RegName[1])); diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h index 9a69e012c72942d..ffccbec36c7491a 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h @@ -37,7 +37,7 @@ class SystemZHLASMInstPrinter : public SystemZInstPrinterCommon { private: void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const override; + raw_ostream &O) override; }; } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp index 00560ab1f4b18d9..fe0f3874765614d 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp @@ -57,8 +57,7 @@ void SystemZInstPrinterCommon::printOperand(const MCOperand &MO, llvm_unreachable("Invalid operand"); } -void SystemZInstPrinterCommon::printRegName(raw_ostream &O, - MCRegister Reg) const { +void SystemZInstPrinterCommon::printRegName(raw_ostream &O, MCRegister Reg) { printFormattedRegName(&MAI, Reg, O); } diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h index 9a972824f7ffb51..1a11e421691ae38 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h @@ -36,10 +36,10 @@ class SystemZInstPrinterCommon : public MCInstPrinter { void printOperand(const MCOperand &MO, const MCAsmInfo *MAI, raw_ostream &O); virtual void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const {} + raw_ostream &O) {} // Override MCInstPrinter. - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; protected: template diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp index 8261b5aa7b4e13a..47455a9a0274c2e 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp @@ -27,7 +27,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "VEGenAsmWriter.inc" -void VEInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void VEInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { // Generic registers have identical register name among register classes. unsigned AltIdx = VE::AsmName; // Misc registers have each own name, so no use alt-names. diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h index 65660a49c5e4dd3..d5e0ebd3596ca86 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h @@ -24,7 +24,7 @@ class VEInstPrinter : public MCInstPrinter { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index 4c29b59b3302e47..026f859b15d7152 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -38,8 +38,7 @@ WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} -void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, - MCRegister Reg) const { +void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { assert(Reg.id() != WebAssembly::UnusedReg); // Note that there's an implicit local.get/local.set here! OS << "$" << Reg.id(); diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h index b499926ab82965b..e7c5e14973b630a 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h @@ -35,7 +35,7 @@ class WebAssemblyInstPrinter final : public MCInstPrinter { WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp index 58b4527af6557b2..c811d621e60eb7b 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -35,7 +35,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" -void X86ATTInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void X86ATTInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << '%' << getRegisterName(Reg); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h index 83040c112b68850..7e525e232362299 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h @@ -23,7 +23,7 @@ class X86ATTInstPrinter final : public X86InstPrinterCommon { const MCRegisterInfo &MRI) : X86InstPrinterCommon(MAI, MII, MRI), HasCustomInstComment(false) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp index cd8b9aa62573001..8e7dae229275bbb 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -33,7 +33,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter1.inc" -void X86IntelInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void X86IntelInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << getRegisterName(Reg); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h index a34c06782f40422..988ab9626c3fd7e 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h @@ -24,7 +24,7 @@ class X86IntelInstPrinter final : public X86InstPrinterCommon { const MCRegisterInfo &MRI) : X86InstPrinterCommon(MAI, MII, MRI) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS); diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp index eda90d3101ab481..707c4a790872805 100644 --- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp +++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp @@ -27,7 +27,7 @@ using namespace llvm; #include "XCoreGenAsmWriter.inc" -void XCoreInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void XCoreInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << StringRef(getRegisterName(Reg)).lower(); } diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h index 916ca99968fbb0c..2b47de457322ee2 100644 --- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h +++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h @@ -31,7 +31,7 @@ class XCoreInstPrinter : public MCInstPrinter { void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp index fe1dc0e2e483e72..e04d7bd211216f2 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp @@ -74,7 +74,7 @@ void XtensaInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation(O, Annot); } -void XtensaInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void XtensaInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { O << getRegisterName(Reg); } diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h index 46a35ae6f4e3fad..4122b1ff2310b71 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h @@ -36,7 +36,7 @@ class XtensaInstPrinter : public MCInstPrinter { static void printOperand(const MCOperand &MO, raw_ostream &O); // Override MCInstPrinter. - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; From 9e8219a78c80442fb0f795f17926595a94a8e7d7 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 29 Oct 2024 19:41:49 -0700 Subject: [PATCH 378/425] IR: Fix verifier missing addrspace mismatch in vector GEPs (#114091) --- llvm/lib/IR/Verifier.cpp | 11 +++++------ llvm/unittests/IR/VerifierTest.cpp | 30 ++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ee807ca13787d5e..ffcab98db9aa026 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4121,8 +4121,9 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); Check(ElTy, "Invalid indices for GEP pointer type!", &GEP); - Check(GEP.getType()->isPtrOrPtrVectorTy() && - GEP.getResultElementType() == ElTy, + PointerType *PtrTy = dyn_cast(GEP.getType()->getScalarType()); + + Check(PtrTy && GEP.getResultElementType() == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); if (auto *GEPVTy = dyn_cast(GEP.getType())) { @@ -4144,10 +4145,8 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - if (auto *PTy = dyn_cast(GEP.getType())) { - Check(GEP.getAddressSpace() == PTy->getAddressSpace(), - "GEP address space doesn't match type", &GEP); - } + Check(GEP.getAddressSpace() == PtrTy->getAddressSpace(), + "GEP address space doesn't match type", &GEP); visitInstruction(GEP); } diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp index 91cd35a10e9b926..462578a34da837d 100644 --- a/llvm/unittests/IR/VerifierTest.cpp +++ b/llvm/unittests/IR/VerifierTest.cpp @@ -385,5 +385,35 @@ TEST(VerifierTest, AtomicRMW) { << Error; } +TEST(VerifierTest, GetElementPtrInst) { + LLVMContext C; + Module M("M", C); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg=*/false); + Function *F = Function::Create(FTy, Function::ExternalLinkage, "foo", M); + BasicBlock *Entry = BasicBlock::Create(C, "entry", F); + ReturnInst *RI = ReturnInst::Create(C, Entry); + + FixedVectorType *V2P1Ty = FixedVectorType::get(PointerType::get(C, 1), 2); + FixedVectorType *V2P2Ty = FixedVectorType::get(PointerType::get(C, 2), 2); + + Instruction *GEPVec = GetElementPtrInst::Create( + Type::getInt8Ty(C), ConstantAggregateZero::get(V2P1Ty), + {ConstantVector::getSplat(ElementCount::getFixed(2), + ConstantInt::get(Type::getInt64Ty(C), 0))}, + Entry); + + GEPVec->insertBefore(RI); + + // Break the address space of the source value + GEPVec->getOperandUse(0).set(ConstantAggregateZero::get(V2P2Ty)); + + std::string Error; + raw_string_ostream ErrorOS(Error); + EXPECT_TRUE(verifyFunction(*F, &ErrorOS)); + EXPECT_TRUE( + StringRef(Error).starts_with("GEP address space doesn't match type")) + << Error; +} + } // end anonymous namespace } // end namespace llvm From c62130f7b35412e7caadf5fd9547f21a736c4543 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Oct 2024 19:42:35 -0700 Subject: [PATCH 379/425] [RISCV] Add OperandType to loadfpimm. (#114150) This is represented in the MachineInstr and MCInst as a 5-bit unsigned immediate so we use OPERAND_UIMM5. If someone needs to know for sure its an FLI constant in the future we can break it out to a new type. --- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 2bdcfd21270e906..f13b3e69f84f899 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -35,6 +35,8 @@ def LoadFPImmOperand : AsmOperandClass { def loadfpimm : Operand { let ParserMatchClass = LoadFPImmOperand; let PrintMethod = "printFPImmOperand"; + let OperandType = "OPERAND_UIMM5"; + let OperandNamespace = "RISCVOp"; } def RTZArg : AsmOperandClass { From 2c313259c65317f097d57ab4c6684b25db98f2e4 Mon Sep 17 00:00:00 2001 From: lialan Date: Tue, 29 Oct 2024 23:04:48 -0400 Subject: [PATCH 380/425] [MLIR] VectorEmulateNarrowType to support loading of unaligned vectors (#113411) Previously, the pass only supported emulation of loading vector sizes that are multiples of the emulated data type. This patch expands its support for emulating sizes that are not multiples of byte sizes. In such cases, the element values are packed back-to-back to preserve memory space. To give a concrete example: if an input has type `memref<3x3xi2>`, it is actually occupying 3 bytes in memory, with the first 18 bits storing the values and the last 6 bits as padding. The slice of `vector<3xi2>` at index `[2, 0]` is stored in memory from bit 12 to bit 18. To properly load the elements from bit 12 to bit 18 from memory, first load byte 2 and byte 3, and convert it to a vector of `i2` type; then extract bits 4 to 10 (element index 2-5) to form a `vector<3xi2>`. A limitation of this patch is that the linearized index of the unaligned vector has to be known at compile time. Extra code needs to be emitted to handle it if the condition does not hold. The following ops are updated: * `vector::LoadOp` * `vector::TransferReadOp` * `vector::MaskedLoadOp` --- .../mlir/Dialect/MemRef/Utils/MemRefUtils.h | 8 +- mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp | 9 +- .../Transforms/VectorEmulateNarrowType.cpp | 235 ++++++++++++++---- .../vector-emulate-narrow-type-unaligned.mlir | 67 +++++ 4 files changed, 264 insertions(+), 55 deletions(-) create mode 100644 mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir diff --git a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h index ca3326dbbef5191..a761a77a407e879 100644 --- a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h +++ b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h @@ -32,7 +32,8 @@ namespace memref { bool isStaticShapeAndContiguousRowMajor(MemRefType type); /// For a `memref` with `offset`, `sizes` and `strides`, returns the -/// offset and size to use for the linearized `memref`. +/// offset, size, and potentially the size padded at the front to use for the +/// linearized `memref`. /// - If the linearization is done for emulating load/stores of /// element type with bitwidth `srcBits` using element type with /// bitwidth `dstBits`, the linearized offset and size are @@ -42,9 +43,14 @@ bool isStaticShapeAndContiguousRowMajor(MemRefType type); /// index to use in the linearized `memref`. The linearized index /// is also scaled down by `dstBits`/`srcBits`. If `indices` is not provided /// 0, is returned for the linearized index. +/// - If the size of the load/store is smaller than the linearized memref +/// load/store, the memory region emulated is larger than the actual memory +/// region needed. `intraDataOffset` returns the element offset of the data +/// relevant at the beginning. struct LinearizedMemRefInfo { OpFoldResult linearizedOffset; OpFoldResult linearizedSize; + OpFoldResult intraDataOffset; }; std::pair getLinearizedMemRefOffsetAndSize( OpBuilder &builder, Location loc, int srcBits, int dstBits, diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp index 7321b19068016cc..6de744a7f752448 100644 --- a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp +++ b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp @@ -81,11 +81,10 @@ std::pair getLinearizedMemRefOffsetAndSize( // Adjust linearizedIndices and size by the scale factor (dstBits / srcBits). int64_t scaler = dstBits / srcBits; - addMulMap = addMulMap.floorDiv(scaler); mulMap = mulMap.floorDiv(scaler); OpFoldResult linearizedIndices = affine::makeComposedFoldedAffineApply( - builder, loc, addMulMap, offsetValues); + builder, loc, addMulMap.floorDiv(scaler), offsetValues); OpFoldResult linearizedSize = affine::makeComposedFoldedAffineApply(builder, loc, mulMap, sizes); @@ -95,7 +94,11 @@ std::pair getLinearizedMemRefOffsetAndSize( OpFoldResult adjustBaseOffset = affine::makeComposedFoldedAffineApply( builder, loc, s0.floorDiv(scaler), {offset}); - return {{adjustBaseOffset, linearizedSize}, linearizedIndices}; + OpFoldResult intraVectorOffset = affine::makeComposedFoldedAffineApply( + builder, loc, addMulMap % scaler, offsetValues); + + return {{adjustBaseOffset, linearizedSize, intraVectorOffset}, + linearizedIndices}; } LinearizedMemRefInfo diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp index 66362d3ca70fb64..1d6f8a991d9b5b7 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" #include "mlir/IR/BuiltinAttributes.h" @@ -22,8 +23,10 @@ #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace mlir; @@ -33,17 +36,22 @@ using namespace mlir; #define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n") /// Returns a compressed mask. The mask value is set only if any mask is present -/// in the scale range. E.g., if `scale` equals to 2, the following mask: +/// in the scale range. E.g., if `scale` equals to 2, and `intraDataOffset` +/// equals to 2, the following mask: /// /// %mask = [1, 1, 1, 0, 0, 0] /// -/// will return the following new compressed mask: +/// will first be padded with number of `intraDataOffset` zeros: +/// %mask = [0, 0, 1, 1, 1, 0, 0, 0] /// -/// %mask = [1, 1, 0] +/// then it will return the following new compressed mask: +/// +/// %mask = [0, 1, 1, 0] static FailureOr getCompressedMaskOp(OpBuilder &rewriter, Location loc, Value mask, - int origElements, int scale) { - auto numElements = (origElements + scale - 1) / scale; + int origElements, int scale, + int intraDataOffset = 0) { + auto numElements = (intraDataOffset + origElements + scale - 1) / scale; Operation *maskOp = mask.getDefiningOp(); SmallVector extractOps; @@ -67,6 +75,9 @@ static FailureOr getCompressedMaskOp(OpBuilder &rewriter, shape.back() = numElements; auto newMaskType = VectorType::get(shape, rewriter.getI1Type()); if (createMaskOp) { + // TODO: handle the case with non-zero intraDataOffset for CreateMaskOp. + if (intraDataOffset != 0) + return failure(); OperandRange maskOperands = createMaskOp.getOperands(); size_t numMaskOperands = maskOperands.size(); AffineExpr s0; @@ -86,11 +97,27 @@ static FailureOr getCompressedMaskOp(OpBuilder &rewriter, ArrayRef maskDimSizes = constantMaskOp.getMaskDimSizes(); size_t numMaskOperands = maskDimSizes.size(); int64_t origIndex = maskDimSizes[numMaskOperands - 1]; - int64_t maskIndex = (origIndex + scale - 1) / scale; + int64_t startIndex = intraDataOffset / scale; + int64_t maskIndex = llvm::divideCeil(intraDataOffset + origIndex, scale); + + // TODO: we only want the mask between [startIndex, maskIndex] to be true, + // the rest are false. + if (intraDataOffset != 0 && maskDimSizes.size() > 1) + return failure(); + SmallVector newMaskDimSizes(maskDimSizes.drop_back()); newMaskDimSizes.push_back(maskIndex); - newMask = rewriter.create(loc, newMaskType, - newMaskDimSizes); + + if (intraDataOffset == 0) { + newMask = rewriter.create(loc, newMaskType, + newMaskDimSizes); + } else { + SmallVector newMaskValues; + for (int64_t i = 0; i < numElements; ++i) + newMaskValues.push_back(i >= startIndex && i < maskIndex); + auto denseAttr = DenseElementsAttr::get(newMaskType, newMaskValues); + newMask = rewriter.create(loc, newMaskType, denseAttr); + } } while (!extractOps.empty()) { @@ -102,6 +129,26 @@ static FailureOr getCompressedMaskOp(OpBuilder &rewriter, return newMask; } +static Value extractSubvectorFrom(RewriterBase &rewriter, Location loc, + VectorType extractType, Value vector, + int64_t frontOffset, int64_t subvecSize) { + auto offsets = rewriter.getI64ArrayAttr({frontOffset}); + auto sizes = rewriter.getI64ArrayAttr({subvecSize}); + auto strides = rewriter.getI64ArrayAttr({1}); + return rewriter + .create(loc, extractType, vector, offsets, + sizes, strides) + ->getResult(0); +} + +static Value insertSubvectorInto(RewriterBase &rewriter, Location loc, + Value src, Value dest, int64_t offset) { + auto offsets = rewriter.getI64ArrayAttr({offset}); + auto strides = rewriter.getI64ArrayAttr({1}); + return rewriter.create(loc, dest.getType(), src, + dest, offsets, strides); +} + namespace { //===----------------------------------------------------------------------===// @@ -201,7 +248,8 @@ struct ConvertVectorMaskedStore final auto stridedMetadata = rewriter.create(loc, op.getBase()); OpFoldResult linearizedIndicesOfr; - std::tie(std::ignore, linearizedIndicesOfr) = + memref::LinearizedMemRefInfo linearizedInfo; + std::tie(linearizedInfo, linearizedIndicesOfr) = memref::getLinearizedMemRefOffsetAndSize( rewriter, loc, srcBits, dstBits, stridedMetadata.getConstifiedMixedOffset(), @@ -214,19 +262,19 @@ struct ConvertVectorMaskedStore final // Load the whole data and use arith.select to handle the corner cases. // E.g., given these input values: // - // %mask = [1, 1, 1, 0, 0, 0] - // %0[%c0, %c0] contains [0x1, 0x2, 0x3, 0x4, 0x5, 0x6] - // %value_to_store = [0x7, 0x8, 0x9, 0xA, 0xB, 0xC] + // %mask = [0, 1, 1, 1, 1, 1, 0, 0] + // %0[%c0, %c0] contains [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8] + // %value_to_store = [0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0] // // we'll have // - // expected output: [0x7, 0x8, 0x9, 0x4, 0x5, 0x6] + // expected output: [0x1, 0xA, 0xB, 0xC, 0xD, 0xE, 0x7, 0x8] // - // %new_mask = [1, 1, 0] - // %maskedload = [0x12, 0x34, 0x0] - // %bitcast = [0x1, 0x2, 0x3, 0x4, 0x0, 0x0] - // %select_using_original_mask = [0x7, 0x8, 0x9, 0x4, 0x0, 0x0] - // %packed_data = [0x78, 0x94, 0x00] + // %new_mask = [1, 1, 1, 0] + // %maskedload = [0x12, 0x34, 0x56, 0x00] + // %bitcast = [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x0, 0x0] + // %select_using_shifted_mask = [0x1, 0xA, 0xB, 0xC, 0xD, 0xE, 0x0, 0x0] + // %packed_data = [0x1A, 0xBC, 0xDE, 0x00] // // Using the new mask to store %packed_data results in expected output. FailureOr newMask = @@ -243,8 +291,9 @@ struct ConvertVectorMaskedStore final loc, newType, adaptor.getBase(), linearizedIndices, newMask.value()->getResult(0), passThru); - Value valueToStore = rewriter.create( - loc, op.getValueToStore().getType(), newLoad); + auto newBitCastType = VectorType::get(numElements * scale, oldElementType); + Value valueToStore = + rewriter.create(loc, newBitCastType, newLoad); valueToStore = rewriter.create( loc, op.getMask(), op.getValueToStore(), valueToStore); valueToStore = @@ -294,19 +343,31 @@ struct ConvertVectorLoad final : OpConversionPattern { // %1 = vector.load %0[%linear_index] : memref<6xi8>, vector<2xi8> // %2 = vector.bitcast %1 : vector<2xi8> to vector<4xi4> // - // TODO: Currently, only the even number of elements loading is supported. - // To deal with the odd number of elements, one has to extract the - // subvector at the proper offset after bit-casting. + // There are cases where the number of elements to load is not byte-aligned, + // for example: + // + // %1 = vector.load %0[%c1, %c0] : memref<3x3xi2>, vector<3xi2> + // + // we will have to load extra bytes and extract the exact slice in between. + // + // %1 = vector.load %0[%c2] : memref<3xi8>, vector<2xi8> + // %2 = vector.bitcast %1 : vector<2xi8> to vector<8xi2> + // %3 = vector.extract_strided_slice %1 {offsets = [2], sizes = [3], strides + // = [1]} + // : vector<8xi2> to vector<3xi2> + // + // TODO: Currently the extract_strided_slice's attributes must be known at + // compile time as they must be constants. auto origElements = op.getVectorType().getNumElements(); - if (origElements % scale != 0) - return failure(); + bool isUnalignedEmulation = origElements % scale != 0; auto stridedMetadata = rewriter.create(loc, op.getBase()); OpFoldResult linearizedIndices; - std::tie(std::ignore, linearizedIndices) = + memref::LinearizedMemRefInfo linearizedInfo; + std::tie(linearizedInfo, linearizedIndices) = memref::getLinearizedMemRefOffsetAndSize( rewriter, loc, srcBits, dstBits, stridedMetadata.getConstifiedMixedOffset(), @@ -314,15 +375,31 @@ struct ConvertVectorLoad final : OpConversionPattern { stridedMetadata.getConstifiedMixedStrides(), getAsOpFoldResult(adaptor.getIndices())); - auto numElements = (origElements + scale - 1) / scale; + std::optional foldedIntraVectorOffset = + isUnalignedEmulation + ? getConstantIntValue(linearizedInfo.intraDataOffset) + : 0; + + if (!foldedIntraVectorOffset) { + // unimplemented case for dynamic intra vector offset + return failure(); + } + + auto numElements = + llvm::divideCeil(*foldedIntraVectorOffset + origElements, scale); auto newLoad = rewriter.create( loc, VectorType::get(numElements, newElementType), adaptor.getBase(), getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices)); - auto bitCast = - rewriter.create(loc, op.getType(), newLoad); + Value result = rewriter.create( + loc, VectorType::get(numElements * scale, oldElementType), newLoad); - rewriter.replaceOp(op, bitCast->getResult(0)); + if (isUnalignedEmulation) { + result = extractSubvectorFrom(rewriter, loc, op.getType(), result, + *foldedIntraVectorOffset, origElements); + } + + rewriter.replaceOp(op, result); return success(); } }; @@ -396,13 +473,13 @@ struct ConvertVectorMaskedLoad final // subvector at the proper offset after bit-casting. auto origType = op.getVectorType(); auto origElements = origType.getNumElements(); - if (origElements % scale != 0) - return failure(); + bool isUnalignedEmulation = origElements % scale != 0; auto stridedMetadata = rewriter.create(loc, op.getBase()); OpFoldResult linearizedIndices; - std::tie(std::ignore, linearizedIndices) = + memref::LinearizedMemRefInfo linearizedInfo; + std::tie(linearizedInfo, linearizedIndices) = memref::getLinearizedMemRefOffsetAndSize( rewriter, loc, srcBits, dstBits, stridedMetadata.getConstifiedMixedOffset(), @@ -410,29 +487,68 @@ struct ConvertVectorMaskedLoad final stridedMetadata.getConstifiedMixedStrides(), getAsOpFoldResult(adaptor.getIndices())); + std::optional foldedIntraVectorOffset = + isUnalignedEmulation + ? getConstantIntValue(linearizedInfo.intraDataOffset) + : 0; + + if (!foldedIntraVectorOffset) { + // unimplemented case for dynamic intra vector offset + return failure(); + } + FailureOr newMask = - getCompressedMaskOp(rewriter, loc, op.getMask(), origElements, scale); + getCompressedMaskOp(rewriter, loc, op.getMask(), origElements, scale, + *foldedIntraVectorOffset); if (failed(newMask)) return failure(); - auto numElements = (origElements + scale - 1) / scale; - auto newType = VectorType::get(numElements, newElementType); + auto numElements = + llvm::divideCeil(*foldedIntraVectorOffset + origElements, scale); + auto loadType = VectorType::get(numElements, newElementType); + auto newBitcastType = VectorType::get(numElements * scale, oldElementType); + + Value passthru = op.getPassThru(); + if (isUnalignedEmulation) { + // create an empty vector of the new type + auto emptyVector = rewriter.create( + loc, newBitcastType, rewriter.getZeroAttr(newBitcastType)); + passthru = insertSubvectorInto(rewriter, loc, passthru, emptyVector, + *foldedIntraVectorOffset); + } auto newPassThru = - rewriter.create(loc, newType, op.getPassThru()); + rewriter.create(loc, loadType, passthru); // Generating the new masked load. auto newLoad = rewriter.create( - loc, newType, adaptor.getBase(), + loc, loadType, adaptor.getBase(), getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices), newMask.value()->getResult(0), newPassThru); // Setting the part that originally was not effectively loaded from memory // to pass through. auto bitCast = - rewriter.create(loc, op.getType(), newLoad); - auto select = rewriter.create(loc, op.getMask(), bitCast, - op.getPassThru()); - rewriter.replaceOp(op, select->getResult(0)); + rewriter.create(loc, newBitcastType, newLoad); + + Value mask = op.getMask(); + if (isUnalignedEmulation) { + auto newSelectMaskType = + VectorType::get(numElements * scale, rewriter.getI1Type()); + // TODO: can fold if op's mask is constant + auto emptyVector = rewriter.create( + loc, newSelectMaskType, rewriter.getZeroAttr(newSelectMaskType)); + mask = insertSubvectorInto(rewriter, loc, op.getMask(), emptyVector, + *foldedIntraVectorOffset); + } + + Value result = + rewriter.create(loc, mask, bitCast, passthru); + + if (isUnalignedEmulation) { + result = extractSubvectorFrom(rewriter, loc, op.getType(), result, + *foldedIntraVectorOffset, origElements); + } + rewriter.replaceOp(op, result); return success(); } @@ -464,8 +580,8 @@ struct ConvertVectorTransferRead final int scale = dstBits / srcBits; auto origElements = op.getVectorType().getNumElements(); - if (origElements % scale != 0) - return failure(); + + bool isUnalignedEmulation = origElements % scale != 0; auto newPadding = rewriter.create(loc, newElementType, adaptor.getPadding()); @@ -474,7 +590,8 @@ struct ConvertVectorTransferRead final rewriter.create(loc, op.getSource()); OpFoldResult linearizedIndices; - std::tie(std::ignore, linearizedIndices) = + memref::LinearizedMemRefInfo linearizedInfo; + std::tie(linearizedInfo, linearizedIndices) = memref::getLinearizedMemRefOffsetAndSize( rewriter, loc, srcBits, dstBits, stridedMetadata.getConstifiedMixedOffset(), @@ -482,18 +599,34 @@ struct ConvertVectorTransferRead final stridedMetadata.getConstifiedMixedStrides(), getAsOpFoldResult(adaptor.getIndices())); - auto numElements = (origElements + scale - 1) / scale; - auto newReadType = VectorType::get(numElements, newElementType); + std::optional foldedIntraVectorOffset = + isUnalignedEmulation + ? getConstantIntValue(linearizedInfo.intraDataOffset) + : 0; + + if (!foldedIntraVectorOffset) { + // unimplemented case for dynamic inra-vector offset + return failure(); + } + + auto numElements = + llvm::divideCeil(*foldedIntraVectorOffset + origElements, scale); auto newRead = rewriter.create( - loc, newReadType, adaptor.getSource(), + loc, VectorType::get(numElements, newElementType), adaptor.getSource(), getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices), newPadding); - auto bitCast = - rewriter.create(loc, op.getType(), newRead); + auto bitCast = rewriter.create( + loc, VectorType::get(numElements * scale, oldElementType), newRead); + + Value result = bitCast->getResult(0); + if (isUnalignedEmulation) { + result = extractSubvectorFrom(rewriter, loc, op.getType(), result, + *foldedIntraVectorOffset, origElements); + } + rewriter.replaceOp(op, result); - rewriter.replaceOp(op, bitCast->getResult(0)); return success(); } }; diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir new file mode 100644 index 000000000000000..7ecbad7968225d0 --- /dev/null +++ b/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir @@ -0,0 +1,67 @@ +// RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8" --cse --split-input-file %s | FileCheck %s + +func.func @vector_load_i2(%arg1: index, %arg2: index) -> vector<3x3xi2> { + %0 = memref.alloc() : memref<3x3xi2> + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %cst = arith.constant dense<0> : vector<3x3xi2> + %1 = vector.load %0[%c2, %c0] : memref<3x3xi2>, vector<3xi2> + %2 = vector.insert %1, %cst [0] : vector<3xi2> into vector<3x3xi2> + return %2 : vector<3x3xi2> +} + +// CHECK: func @vector_load_i2 +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8> +// CHECK: %[[INDEX:.+]] = arith.constant 1 : index +// CHECK: %[[VEC:.+]] = vector.load %[[ALLOC]][%[[INDEX]]] : memref<3xi8>, vector<2xi8> +// CHECK: %[[VEC_I2:.+]] = vector.bitcast %[[VEC]] : vector<2xi8> to vector<8xi2> +// CHECK: %[[EXCTRACT:.+]] = vector.extract_strided_slice %[[VEC_I2]] {offsets = [2], sizes = [3], strides = [1]} : vector<8xi2> to vector<3xi2> + +//----- + +func.func @vector_transfer_read_i2() -> vector<3xi2> { + %0 = memref.alloc() : memref<3x3xi2> + %c0i2 = arith.constant 0 : i2 + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %1 = vector.transfer_read %0[%c2, %c0], %c0i2 {in_bounds = [true]} : memref<3x3xi2>, vector<3xi2> + return %1 : vector<3xi2> +} + +// CHECK: func @vector_transfer_read_i2 +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8> +// CHECK: %[[INDEX:.+]] = arith.constant 1 : index +// CHECK: %[[READ:.+]] = vector.transfer_read %[[ALLOC]][%[[INDEX]]], %0 : memref<3xi8>, vector<2xi8> +// CHECK: %[[BITCAST:.+]] = vector.bitcast %[[READ]] : vector<2xi8> to vector<8xi2> +// CHECK: vector.extract_strided_slice %[[BITCAST]] {offsets = [2], sizes = [3], strides = [1]} : vector<8xi2> to vector<3xi2> + +//----- + +func.func @vector_cst_maskedload_i2(%passthru: vector<5xi2>) -> vector<3x5xi2> { + %0 = memref.alloc() : memref<3x5xi2> + %cst = arith.constant dense<0> : vector<3x5xi2> + %mask = vector.constant_mask [3] : vector<5xi1> + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %1 = vector.maskedload %0[%c2, %c0], %mask, %passthru : + memref<3x5xi2>, vector<5xi1>, vector<5xi2> into vector<5xi2> + %2 = vector.insert %1, %cst [0] : vector<5xi2> into vector<3x5xi2> + return %2 : vector<3x5xi2> +} + +// CHECK: func @vector_cst_maskedload_i2 +// CHECK: %[[ORIGINMASK:.+]] = vector.constant_mask [3] : vector<5xi1> +// CHECK: %[[NEWMASK:.+]] = arith.constant dense : vector<2xi1> +// CHECK: %[[VESSEL:.+]] = arith.constant dense<0> : vector<8xi2> +// CHECK: %[[INSERT1:.+]] = vector.insert_strided_slice %arg0, %[[VESSEL]] +// CHECK-SAME: {offsets = [2], strides = [1]} : vector<5xi2> into vector<8xi2> +// CHECK: %[[BITCAST1:.+]] = vector.bitcast %[[INSERT1]] : vector<8xi2> to vector<2xi8> +// CHECK: %[[C2:.+]] = arith.constant 2 : index +// CHECK: %[[MASKEDLOAD:.+]] = vector.maskedload %alloc[%[[C2]]], %[[NEWMASK:.+]], %[[BITCAST1]] +// CHECK-SAME: : memref<4xi8>, vector<2xi1>, vector<2xi8> into vector<2xi8> +// CHECK: %[[BITCAST2:.+]] = vector.bitcast %[[MASKEDLOAD]] : vector<2xi8> to vector<8xi2> +// CHECK: %[[CST2:.+]] = arith.constant dense : vector<8xi1> +// CHECK: %[[INSERT2:.+]] = vector.insert_strided_slice %[[ORIGINMASK]], %[[CST2]] +// CHECK-SAME: {offsets = [2], strides = [1]} : vector<5xi1> into vector<8xi1> +// CHECK: %[[SELECT:.+]] = arith.select %[[INSERT2]], %[[BITCAST2]], %[[INSERT1]] : vector<8xi1>, vector<8xi2> +// CHECK: vector.extract_strided_slice %[[SELECT]] {offsets = [2], sizes = [5], strides = [1]} : vector<8xi2> to vector<5xi2> From 29bff4aad8eb7f54f99e0496b735aee193063b04 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 29 Oct 2024 20:06:56 -0700 Subject: [PATCH 381/425] [llvm-objdump] Fix coloring with nested WithMarkup WithMarkup objects may nest, resulting in the `)` in `leaq (%rdx,%rax), %rbx` to be green instead of the default color, mismatching the color of `(`. ``` % llvm-mc -triple=x86_64 -mdis <<< '0x48 0x8d 0x1c 0x02' .text leaq ,)>, ``` To ensure that `(` and `)` get the same color, maintain a color stack within MCInstPrinter. Fix #99661 Pull Request: https://github.com/llvm/llvm-project/pull/113834 --- llvm/include/llvm/MC/MCInstPrinter.h | 10 ++++--- llvm/lib/MC/MCInstPrinter.cpp | 26 ++++++++++++------- .../llvm-objdump/X86/disassemble-color.s | 21 +++++++++++++++ 3 files changed, 44 insertions(+), 13 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/X86/disassemble-color.s diff --git a/llvm/include/llvm/MC/MCInstPrinter.h b/llvm/include/llvm/MC/MCInstPrinter.h index 0b9c738a7a0a319..e825c04a6dba6f9 100644 --- a/llvm/include/llvm/MC/MCInstPrinter.h +++ b/llvm/include/llvm/MC/MCInstPrinter.h @@ -9,8 +9,10 @@ #ifndef LLVM_MC_MCINSTPRINTER_H #define LLVM_MC_MCINSTPRINTER_H +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" #include namespace llvm { @@ -24,7 +26,6 @@ class MCRegister; class MCRegisterInfo; class MCSubtargetInfo; class StringRef; -class raw_ostream; /// Convert `Bytes' to a hex string and output to `OS' void dumpBytes(ArrayRef Bytes, raw_ostream &OS); @@ -76,6 +77,8 @@ class MCInstPrinter { /// If true, symbolize branch target and memory reference operands. bool SymbolizeOperands = false; + SmallVector ColorStack{raw_ostream::Colors::RESET}; + /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); @@ -98,8 +101,8 @@ class MCInstPrinter { class WithMarkup { public: - LLVM_CTOR_NODISCARD WithMarkup(raw_ostream &OS, Markup M, bool EnableMarkup, - bool EnableColor); + LLVM_CTOR_NODISCARD WithMarkup(MCInstPrinter &IP, raw_ostream &OS, Markup M, + bool EnableMarkup, bool EnableColor); ~WithMarkup(); template WithMarkup &operator<<(T &O) { @@ -113,6 +116,7 @@ class MCInstPrinter { } private: + MCInstPrinter &IP; raw_ostream &OS; bool EnableMarkup; bool EnableColor; diff --git a/llvm/lib/MC/MCInstPrinter.cpp b/llvm/lib/MC/MCInstPrinter.cpp index 488e34a6d53954d..069716a3ecf9b70 100644 --- a/llvm/lib/MC/MCInstPrinter.cpp +++ b/llvm/lib/MC/MCInstPrinter.cpp @@ -225,27 +225,31 @@ format_object MCInstPrinter::formatHex(uint64_t Value) const { } MCInstPrinter::WithMarkup MCInstPrinter::markup(raw_ostream &OS, Markup S) { - return WithMarkup(OS, S, getUseMarkup(), getUseColor()); + return WithMarkup(*this, OS, S, getUseMarkup(), getUseColor()); } -MCInstPrinter::WithMarkup::WithMarkup(raw_ostream &OS, Markup M, - bool EnableMarkup, bool EnableColor) - : OS(OS), EnableMarkup(EnableMarkup), EnableColor(EnableColor) { +MCInstPrinter::WithMarkup::WithMarkup(MCInstPrinter &IP, raw_ostream &OS, + Markup M, bool EnableMarkup, + bool EnableColor) + : IP(IP), OS(OS), EnableMarkup(EnableMarkup), EnableColor(EnableColor) { if (EnableColor) { + raw_ostream::Colors Color = raw_ostream::Colors::RESET; switch (M) { case Markup::Immediate: - OS.changeColor(raw_ostream::RED); + Color = raw_ostream::RED; break; case Markup::Register: - OS.changeColor(raw_ostream::CYAN); + Color = raw_ostream::CYAN; break; case Markup::Target: - OS.changeColor(raw_ostream::YELLOW); + Color = raw_ostream::YELLOW; break; case Markup::Memory: - OS.changeColor(raw_ostream::GREEN); + Color = raw_ostream::GREEN; break; } + IP.ColorStack.push_back(Color); + OS.changeColor(Color); } if (EnableMarkup) { @@ -269,6 +273,8 @@ MCInstPrinter::WithMarkup::WithMarkup(raw_ostream &OS, Markup M, MCInstPrinter::WithMarkup::~WithMarkup() { if (EnableMarkup) OS << '>'; - if (EnableColor) - OS.resetColor(); + if (!EnableColor) + return; + IP.ColorStack.pop_back(); + OS << IP.ColorStack.back(); } diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-color.s b/llvm/test/tools/llvm-objdump/X86/disassemble-color.s new file mode 100644 index 000000000000000..4e1d82562fb546a --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-color.s @@ -0,0 +1,21 @@ +# UNSUPPORTED: system-windows +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-objdump -d --no-show-raw-insn --disassembler-color=on %t | FileCheck %s --check-prefix=ATT +# RUN: llvm-objdump -d --no-show-raw-insn --disassembler-color=on -M intel %t | FileCheck %s --check-prefix=INTEL + +# ATT: <.text>: +# ATT-NEXT: leaq (%rdx,%rax,4), %rbx +# ATT-NEXT: movq (,%rax), %rbx +# ATT-NEXT: leaq 0x3(%rdx,%rax), %rbx +# ATT-NEXT: movq $0x3, %rax + +# INTEL: <.text>: +# INTEL-NEXT: lea rbx, [rdx + 4*rax] +# INTEL-NEXT: mov rbx, qword ptr [1*rax] +# INTEL-NEXT: lea rbx, [rdx + rax + 0x3] +# INTEL-NEXT: mov rax, 0x3 + +leaq (%rdx,%rax,4), %rbx +movq (,%rax), %rbx +leaq 3(%rdx,%rax), %rbx +movq $3, %rax From 3c02fea737d774bbf174c6b763593ad3e7f56221 Mon Sep 17 00:00:00 2001 From: Piotr Fusik Date: Wed, 30 Oct 2024 04:07:14 +0100 Subject: [PATCH 382/425] [LV][NFC] Remove stray semicolons (#114057) --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 778d928252e0519..150fc4a42b4847f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -581,8 +581,8 @@ class InnerLoopVectorizer { /// Allow subclasses to override and print debug traces before/after vplan /// execution, when trace information is requested. - virtual void printDebugTracesAtStart(){}; - virtual void printDebugTracesAtEnd(){}; + virtual void printDebugTracesAtStart() {} + virtual void printDebugTracesAtEnd() {} /// The original loop. Loop *OrigLoop; @@ -1310,7 +1310,7 @@ class LoopVectorizationCostModel { return false; case cl::BOU_FALSE: return true; - }; + } llvm_unreachable("impossible case value"); } @@ -9065,7 +9065,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { if (!getDecisionAndClampRange(ApplyIG, Range)) continue; InterleaveGroups.insert(IG); - }; + } // --------------------------------------------------------------------------- // Construct recipes for the instructions in the loop From ef455e6b16334128c008fc57a4d8ace701934e80 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Wed, 30 Oct 2024 03:12:23 +0000 Subject: [PATCH 383/425] [TableGen] Replace all lingering uses of getName with getEnumName The former is a wrapper for the latter with two differences: Other is mapped to "UNKNOWN" (rather than "MVT::Other"), and iPTR(Any) are mapped to "TLI.getPointerTy()" rather than "MVT::iPTR(Any)". The only uses are in FastISelMap::printFunctionDefinitions. Most of these uses are just a form of name mangling to ensure uniqueness, so the actual string isn't important (and, in the case of MVT::iPTR(Any), were both to be used, they would clash). Two uses are for a case statement, which requires the expression to be a constant (of the right type), but neither UNKNOWN nor TLI.getPointerTy() are constants, so would not work there. The remaining uses are where an expression is needed, so UNKNOWN similarly doesn't work, though TLI.getPointerTy() could in this case. However, neither iPTR nor iPTRAny are supposed to make it this far through TableGen, and should instead have been replaced with concrete types, so this case should not be hit. Moreover, for almost all of these uses, the name is passed to getLegalCName, which will strip an MVT:: prefix but will leave TLI.getPointerTy() unchanged, which is not a valid C identifier, nor component thereof. Thus, delete this unnecessary, and mostly-broken, wrapper and just use the underlying getEnumName. This has been verified to have no effect on the generated files for any in-tree target, including experimental ones. Reviewers: arsenm Reviewed By: arsenm Pull Request: https://github.com/llvm/llvm-project/pull/113731 --- llvm/utils/TableGen/Common/CodeGenTarget.cpp | 13 ------------ llvm/utils/TableGen/Common/CodeGenTarget.h | 1 - llvm/utils/TableGen/FastISelEmitter.cpp | 21 ++++++++++---------- 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.cpp b/llvm/utils/TableGen/Common/CodeGenTarget.cpp index b358518c4290b0f..4e75db689a0b57a 100644 --- a/llvm/utils/TableGen/Common/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/Common/CodeGenTarget.cpp @@ -47,19 +47,6 @@ MVT::SimpleValueType llvm::getValueType(const Record *Rec) { return (MVT::SimpleValueType)Rec->getValueAsInt("Value"); } -StringRef llvm::getName(MVT::SimpleValueType T) { - switch (T) { - case MVT::Other: - return "UNKNOWN"; - case MVT::iPTR: - return "TLI.getPointerTy()"; - case MVT::iPTRAny: - return "TLI.getPointerTy()"; - default: - return getEnumName(T); - } -} - StringRef llvm::getEnumName(MVT::SimpleValueType T) { // clang-format off switch (T) { diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.h b/llvm/utils/TableGen/Common/CodeGenTarget.h index c7b44f7028eb5bb..8bcb2f677a00b0b 100644 --- a/llvm/utils/TableGen/Common/CodeGenTarget.h +++ b/llvm/utils/TableGen/Common/CodeGenTarget.h @@ -46,7 +46,6 @@ class CodeGenSubRegIndex; /// record corresponds to. MVT::SimpleValueType getValueType(const Record *Rec); -StringRef getName(MVT::SimpleValueType T); StringRef getEnumName(MVT::SimpleValueType T); /// getQualifiedName - Return the name of the specified record, with a diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp index 17198c85f060091..2052222cae5e5f2 100644 --- a/llvm/utils/TableGen/FastISelEmitter.cpp +++ b/llvm/utils/TableGen/FastISelEmitter.cpp @@ -718,19 +718,20 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { const PredMap &PM = RI.second; OS << "unsigned fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(std::string(getName(VT))) << "_" - << getLegalCName(std::string(getName(RetVT))) << "_"; + << getLegalCName(std::string(getEnumName(VT))) << "_" + << getLegalCName(std::string(getEnumName(RetVT))) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "("; Operands.PrintParameters(OS); OS << ") {\n"; - emitInstructionCode(OS, Operands, PM, std::string(getName(RetVT))); + emitInstructionCode(OS, Operands, PM, + std::string(getEnumName(RetVT))); } // Emit one function for the type that demultiplexes on return type. OS << "unsigned fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(std::string(getName(VT))) << "_"; + << getLegalCName(std::string(getEnumName(VT))) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "(MVT RetVT"; if (!Operands.empty()) @@ -739,10 +740,10 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { OS << ") {\nswitch (RetVT.SimpleTy) {\n"; for (const auto &RI : RM) { MVT::SimpleValueType RetVT = RI.first; - OS << " case " << getName(RetVT) << ": return fastEmit_" + OS << " case " << getEnumName(RetVT) << ": return fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(std::string(getName(VT))) << "_" - << getLegalCName(std::string(getName(RetVT))) << "_"; + << getLegalCName(std::string(getEnumName(VT))) << "_" + << getLegalCName(std::string(getEnumName(RetVT))) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "("; Operands.PrintArguments(OS); @@ -753,7 +754,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { } else { // Non-variadic return type. OS << "unsigned fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(std::string(getName(VT))) << "_"; + << getLegalCName(std::string(getEnumName(VT))) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "(MVT RetVT"; if (!Operands.empty()) @@ -761,7 +762,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { Operands.PrintParameters(OS); OS << ") {\n"; - OS << " if (RetVT.SimpleTy != " << getName(RM.begin()->first) + OS << " if (RetVT.SimpleTy != " << getEnumName(RM.begin()->first) << ")\n return 0;\n"; const PredMap &PM = RM.begin()->second; @@ -781,7 +782,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { OS << " switch (VT.SimpleTy) {\n"; for (const auto &TI : TM) { MVT::SimpleValueType VT = TI.first; - std::string TypeName = std::string(getName(VT)); + std::string TypeName = std::string(getEnumName(VT)); OS << " case " << TypeName << ": return fastEmit_" << getLegalCName(Opcode) << "_" << getLegalCName(TypeName) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); From e8b7f53fa4dc8a9f74a3d67dfb89eb68fcd78679 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Wed, 30 Oct 2024 03:19:53 +0000 Subject: [PATCH 384/425] [TableGen] Remove a pointless check for iPTRAny We've already called EnforceInteger on Types[0], and iPTRAny isn't regarded as an integer type (note that TableGen special-cases iPTR here to include that, though), so we cannot possibly still have an iPTRAny by this point. Delete the check, and let getFixedSizeInBits catch it along with all the other overloaded types if that ever becomes false. Also document why we have this check whilst here. Reviewers: arsenm Reviewed By: arsenm Pull Request: https://github.com/llvm/llvm-project/pull/113732 --- llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index d2228c902a56b47..3446bfeb3e7e19f 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -2461,7 +2461,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { ValueTypeByHwMode VVT = TP.getInfer().getConcrete(Types[0], false); for (auto &P : VVT) { MVT::SimpleValueType VT = P.second.SimpleTy; - if (VT == MVT::iPTR || VT == MVT::iPTRAny) + // Can only check for types of a known size + if (VT == MVT::iPTR) continue; unsigned Size = MVT(VT).getFixedSizeInBits(); // Make sure that the value is representable for this type. From 9467645547f99ba8fa8152d514f06e76e0be8585 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Wed, 30 Oct 2024 03:27:48 +0000 Subject: [PATCH 385/425] [CodeGen] Rename MVT::iPTRAny to MVT::pAny Whilst in upstream LLVM iPTRAny is only ever an integer, essentially an alias for iPTR, this is not true in CHERI LLVM, where it gets used to mean "iPTR or cPTR", i.e. either an integer address or a capability (with cPTR and cN being the capability equivalents of iPTR and iN). Moreover, iPTRAny is already not itself regarded as an integer (calling isInteger() will give false), so the "i" prefix is misleading, and it stands out as different from all the other xAny that have a single letter prefix denoting their type. Thus, rename it to pAny, reflecting that it is an overloaded pointer type, which could end up being specialised to an integer type, but does not have to be. This has been verified to have no effect on the generated files for LLVM itself or any in-tree target beyond the replacement of the identifier iPTRAny with pAny in GenVT.inc. Reviewers: arsenm Reviewed By: arsenm Pull Request: https://github.com/llvm/llvm-project/pull/113733 --- llvm/include/llvm/CodeGen/ValueTypes.h | 3 ++- llvm/include/llvm/CodeGen/ValueTypes.td | 4 ++-- llvm/include/llvm/CodeGenTypes/MachineValueType.h | 2 +- llvm/include/llvm/IR/Intrinsics.h | 2 +- llvm/include/llvm/IR/Intrinsics.td | 6 +++--- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 2 +- llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp | 4 ++-- mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp | 2 +- 8 files changed, 13 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h index 3db6f33a8093f06..4de109739227ad2 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -230,7 +230,8 @@ namespace llvm { /// Return true if this is an overloaded type for TableGen. bool isOverloaded() const { - return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny); + return (V == MVT::iAny || V == MVT::fAny || V == MVT::vAny || + V == MVT::pAny); } /// Return true if the bit size is a multiple of 8. diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 493c0cfcab60ce4..6d6b92958b43218 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -338,9 +338,9 @@ def MetadataVT : ValueType<0, 505> { // Metadata let LLVMName = "Metadata"; } -// Pseudo valuetype mapped to the current pointer size to any address space. +// Pseudo valuetype to represent "pointer to any address space" // Should only be used in TableGen. -def iPTRAny : VTAny<506>; +def pAny : VTAny<506>; // Pseudo valuetype to represent "vector of any size" // Should only be used in TableGen. diff --git a/llvm/include/llvm/CodeGenTypes/MachineValueType.h b/llvm/include/llvm/CodeGenTypes/MachineValueType.h index c9a5098ef1623ed..5c47ad4824a7911 100644 --- a/llvm/include/llvm/CodeGenTypes/MachineValueType.h +++ b/llvm/include/llvm/CodeGenTypes/MachineValueType.h @@ -320,7 +320,7 @@ namespace llvm { llvm_unreachable("Value type is non-standard value, Other."); case iPTR: llvm_unreachable("Value type size is target-dependent. Ask TLI."); - case iPTRAny: + case pAny: case iAny: case fAny: case vAny: diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index e893295e3272b90..89dfff256e0c432 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -92,7 +92,7 @@ namespace Intrinsic { /// return the existing declaration. /// /// The \p Tys parameter is for intrinsics with overloaded types (e.g., those - /// using iAny, fAny, vAny, or iPTRAny). For a declaration of an overloaded + /// using iAny, fAny, vAny, or pAny). For a declaration of an overloaded /// intrinsic, Tys must provide exactly one type for each overloaded type in /// the intrinsic. Function *getOrInsertDeclaration(Module *M, ID id, ArrayRef Tys = {}); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e91758ed34eb389..8ed57f818d60062 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -388,7 +388,7 @@ class LLVMAnyType : LLVMType { !eq(vt, iAny) : ArgKind.AnyInteger, !eq(vt, fAny) : ArgKind.AnyFloat, !eq(vt, vAny) : ArgKind.AnyVector, - !eq(vt, iPTRAny) : ArgKind.AnyPointer, + !eq(vt, pAny) : ArgKind.AnyPointer, ); let Sig = [ IIT_ARG.Number, @@ -412,8 +412,8 @@ class LLVMQualPointerType ]); } -class LLVMAnyPointerType : LLVMAnyType { - assert isAny, "iPTRAny should have isOverloaded"; +class LLVMAnyPointerType : LLVMAnyType { + assert isAny, "pAny should have isOverloaded"; } // Match the type of another intrinsic parameter. Number is an index into the diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 5f6cba397c53529..1ca3aefb0b09344 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1922,7 +1922,7 @@ def imem : Operand { let PrintMethod = "printOperand"; } -def imemAny : Operand { +def imemAny : Operand { let PrintMethod = "printOperand"; } diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index 3446bfeb3e7e19f..f17c62dd1fd9d42 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -813,8 +813,8 @@ void TypeInfer::expandOverloads(TypeSetByHwMode &VTS) const { void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out, const TypeSetByHwMode::SetType &Legal) const { - if (Out.count(MVT::iPTRAny)) { - Out.erase(MVT::iPTRAny); + if (Out.count(MVT::pAny)) { + Out.erase(MVT::pAny); Out.insert(MVT::iPTR); } else if (Out.count(MVT::iAny)) { Out.erase(MVT::iAny); diff --git a/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp b/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp index 411a98a48bfb28b..525c8d6d3e89bc9 100644 --- a/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp +++ b/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp @@ -76,7 +76,7 @@ static IndicesTy getOverloadableTypeIdxs(const Record &record, case llvm::MVT::iAny: case llvm::MVT::fAny: case llvm::MVT::Any: - case llvm::MVT::iPTRAny: + case llvm::MVT::pAny: case llvm::MVT::vAny: overloadedOps.set(r.index()); break; From 0d94c7b5ceb84b33b50c8e7b1fa66e9996a29373 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 29 Oct 2024 20:39:17 -0700 Subject: [PATCH 386/425] [flang][cuda][NFC] Make pattern names homogenous (#114156) Dialect name is uppercase. Make all the patterns prefix homogenous. --- .../Optimizer/Transforms/CUFOpConversion.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index fe125db7b4061ec..f1f3a95b220df5f 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -173,7 +173,7 @@ static mlir::LogicalResult convertOpToCall(OpTy op, return mlir::success(); } -struct CufAllocateOpConversion +struct CUFAllocateOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -216,7 +216,7 @@ struct CufAllocateOpConversion } }; -struct CufDeallocateOpConversion +struct CUFDeallocateOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -284,10 +284,10 @@ static int computeWidth(mlir::Location loc, mlir::Type type, return width; } -struct CufAllocOpConversion : public mlir::OpRewritePattern { +struct CUFAllocOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; - CufAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl, + CUFAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl, const fir::LLVMTypeConverter *typeConverter) : OpRewritePattern(context), dl{dl}, typeConverter{typeConverter} {} @@ -380,7 +380,7 @@ struct CufAllocOpConversion : public mlir::OpRewritePattern { const fir::LLVMTypeConverter *typeConverter; }; -struct CufFreeOpConversion : public mlir::OpRewritePattern { +struct CUFFreeOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; mlir::LogicalResult @@ -429,11 +429,11 @@ struct CufFreeOpConversion : public mlir::OpRewritePattern { } }; -struct CufDataTransferOpConversion +struct CUFDataTransferOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; - CufDataTransferOpConversion(mlir::MLIRContext *context, + CUFDataTransferOpConversion(mlir::MLIRContext *context, const mlir::SymbolTable &symtab) : OpRewritePattern(context), symtab{symtab} {} @@ -718,9 +718,9 @@ class CUFOpConversion : public fir::impl::CUFOpConversionBase { void cuf::populateCUFToFIRConversionPatterns( const fir::LLVMTypeConverter &converter, mlir::DataLayout &dl, const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) { - patterns.insert(patterns.getContext(), &dl, &converter); - patterns.insert(patterns.getContext()); - patterns.insert( + patterns.insert(patterns.getContext(), &dl, &converter); + patterns.insert(patterns.getContext()); + patterns.insert( patterns.getContext(), symtab); } From cb04d3378096b83e5e357490ff8b1c479f34c469 Mon Sep 17 00:00:00 2001 From: jeffreytan81 Date: Tue, 29 Oct 2024 20:42:54 -0700 Subject: [PATCH 387/425] Improve namespace lookup using .debug_names parent chain (#110062) ## Summary This PR is a continuation of https://github.com/llvm/llvm-project/pull/108907 by using `.debug_names` parent chain faster lookup for namespaces. ## Implementation Similar to https://github.com/llvm/llvm-project/pull/108907. This PR adds a new API: `GetNamespacesWithParents` in `DWARFIndex` base class. The API performs the same function as `GetNamespaces()` with additional filtering using parents `CompilerDeclContext`. A default implementation is given in `DWARFIndex` class which parses debug info and performs the matching. In the `DebugNameDWARFIndex` override, parents `CompilerDeclContext` is cross checked with parent chain in `.debug_names` for much faster filtering before fallback to base implementation for final filtering. ## Performance Results For the same benchmark used in https://github.com/llvm/llvm-project/pull/108907, this PR improves: 48s => 28s --------- Co-authored-by: jeffreytan81 --- .../Plugins/SymbolFile/DWARF/DWARFIndex.cpp | 16 +++++ .../Plugins/SymbolFile/DWARF/DWARFIndex.h | 11 ++++ .../SymbolFile/DWARF/DebugNamesDWARFIndex.cpp | 64 ++++++++++++++++--- .../SymbolFile/DWARF/DebugNamesDWARFIndex.h | 4 +- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 2 +- 5 files changed, 85 insertions(+), 12 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index dee90804c525840..c18edd10b968199 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -151,3 +151,19 @@ bool DWARFIndex::ProcessTypeDIEMatchQuery( return true; return callback(die); } + +void DWARFIndex::GetNamespacesWithParents( + ConstString name, const CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback) { + GetNamespaces(name, [&](DWARFDIE die) { + return ProcessNamespaceDieMatchParents(parent_decl_ctx, die, callback); + }); +} + +bool DWARFIndex::ProcessNamespaceDieMatchParents( + const CompilerDeclContext &parent_decl_ctx, DWARFDIE die, + llvm::function_ref callback) { + if (!SymbolFileDWARF::DIEInDeclContext(parent_decl_ctx, die)) + return true; + return callback(die); +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h index fea3a4fd697389f..ac1f75e91c21954 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -71,6 +71,14 @@ class DWARFIndex { virtual void GetTypesWithQuery(TypeQuery &query, llvm::function_ref callback); + /// Get namespace DIEs whose base name match \param name with \param + /// parent_decl_ctx in its decl parent chain. A base implementation + /// is provided. Specializations should override this if they are able to + /// provide a faster implementation. + virtual void + GetNamespacesWithParents(ConstString name, + const CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback); virtual void GetFunctions(const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, const CompilerDeclContext &parent_decl_ctx, @@ -127,6 +135,9 @@ class DWARFIndex { bool ProcessTypeDIEMatchQuery(TypeQuery &query, DWARFDIE die, llvm::function_ref callback); + bool ProcessNamespaceDieMatchParents( + const CompilerDeclContext &parent_decl_ctx, DWARFDIE die, + llvm::function_ref callback); }; } // namespace dwarf } // namespace lldb_private::plugin diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index c809e5ff7f85353..6f2cb455ec00e15 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -368,9 +368,10 @@ void DebugNamesDWARFIndex::GetFullyQualifiedType( continue; } - if (SameParentChain(parent_names, *parent_chain) && - !ProcessEntry(entry, callback)) - return; + if (SameParentChain(parent_names, *parent_chain)) { + if (!ProcessEntry(entry, callback)) + return; + } } m_fallback.GetFullyQualifiedType(context, callback); } @@ -554,17 +555,60 @@ void DebugNamesDWARFIndex::GetTypesWithQuery( continue; } - if (WithinParentChain(parent_contexts, *parent_chain) && - !ProcessEntry(entry, [&](DWARFDIE die) { - // After .debug_names filtering still sending to base class for - // further filtering before calling the callback. - return ProcessTypeDIEMatchQuery(query, die, callback); - })) - return; + if (WithinParentChain(parent_contexts, *parent_chain)) { + if (!ProcessEntry(entry, [&](DWARFDIE die) { + // After .debug_names filtering still sending to base class for + // further filtering before calling the callback. + return ProcessTypeDIEMatchQuery(query, die, callback); + })) + // If the callback returns false, we're done. + return; + } } m_fallback.GetTypesWithQuery(query, callback); } +void DebugNamesDWARFIndex::GetNamespacesWithParents( + ConstString name, const CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback) { + std::vector parent_contexts = + parent_decl_ctx.GetCompilerContext(); + llvm::SmallVector parent_named_contexts; + std::copy_if(parent_contexts.rbegin(), parent_contexts.rend(), + std::back_inserter(parent_named_contexts), + [](const CompilerContext &ctx) { return !ctx.name.IsEmpty(); }); + for (const DebugNames::Entry &entry : + m_debug_names_up->equal_range(name.GetStringRef())) { + lldb_private::dwarf::Tag entry_tag = entry.tag(); + if (entry_tag == DW_TAG_namespace || + entry_tag == DW_TAG_imported_declaration) { + std::optional> parent_chain = + getParentChain(entry); + if (!parent_chain) { + // Fallback: use the base class implementation. + if (!ProcessEntry(entry, [&](DWARFDIE die) { + return ProcessNamespaceDieMatchParents(parent_decl_ctx, die, + callback); + })) + return; + continue; + } + + if (WithinParentChain(parent_named_contexts, *parent_chain)) { + if (!ProcessEntry(entry, [&](DWARFDIE die) { + // After .debug_names filtering still sending to base class for + // further filtering before calling the callback. + return ProcessNamespaceDieMatchParents(parent_decl_ctx, die, + callback); + })) + // If the callback returns false, we're done. + return; + } + } + } + m_fallback.GetNamespacesWithParents(name, parent_decl_ctx, callback); +} + void DebugNamesDWARFIndex::GetFunctions( const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, const CompilerDeclContext &parent_decl_ctx, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index 074f68a8c55963d..ab6cde12623f6ab 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -55,7 +55,9 @@ class DebugNamesDWARFIndex : public DWARFIndex { void GetTypesWithQuery(TypeQuery &query, llvm::function_ref callback) override; - + void GetNamespacesWithParents( + ConstString name, const CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback) override; void GetFunctions(const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, const CompilerDeclContext &parent_decl_ctx, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index e5b8eee8d08c246..f23f8cc3d781d03 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2900,7 +2900,7 @@ SymbolFileDWARF::FindNamespace(ConstString name, if (!DeclContextMatchesThisSymbolFile(parent_decl_ctx)) return namespace_decl_ctx; - m_index->GetNamespaces(name, [&](DWARFDIE die) { + m_index->GetNamespacesWithParents(name, parent_decl_ctx, [&](DWARFDIE die) { if (!DIEInDeclContext(parent_decl_ctx, die, only_root_namespaces)) return true; // The containing decl contexts don't match From 3de5dbb1110887d5127e815f3ca247a9d839ee85 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 29 Oct 2024 23:43:45 -0400 Subject: [PATCH 388/425] [AMDGPU][Attributor] Check the validity of a dependent AA before using its value (#114165) Even though the Attributor framework will invalidate all its dependent AAs after the current iteration, a dependent AA can still use the worst state of a depending AA if it doesn't check the state of the depending AA in current iteration. --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 687a7339da379d5..6a69b9d2bfc7161 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -358,7 +358,7 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { const auto *CallerInfo = A.getAAFor( *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); - if (!CallerInfo) + if (!CallerInfo || !CallerInfo->isValidState()) return false; Change = Change | clampStateAndIndicateChange(this->getState(), @@ -449,7 +449,8 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { // Check for Intrinsics and propagate attributes. const AACallEdges *AAEdges = A.getAAFor( *this, this->getIRPosition(), DepClassTy::REQUIRED); - if (!AAEdges || AAEdges->hasNonAsmUnknownCallee()) + if (!AAEdges || !AAEdges->isValidState() || + AAEdges->hasNonAsmUnknownCallee()) return indicatePessimisticFixpoint(); bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); @@ -465,7 +466,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { if (IID == Intrinsic::not_intrinsic) { const AAAMDAttributes *AAAMD = A.getAAFor( *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); - if (!AAAMD) + if (!AAAMD || !AAAMD->isValidState()) return indicatePessimisticFixpoint(); *this &= *AAAMD; continue; @@ -660,7 +661,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { const auto *PointerInfoAA = A.getAAFor( *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); - if (!PointerInfoAA) + if (!PointerInfoAA || !PointerInfoAA->getState().isValidState()) return false; return PointerInfoAA->forallInterferingAccesses( @@ -717,7 +718,7 @@ struct AAAMDSizeRangeAttribute const auto *CallerInfo = A.getAAFor( *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); - if (!CallerInfo) + if (!CallerInfo || !CallerInfo->isValidState()) return false; Change |= @@ -835,7 +836,8 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { auto &InfoCache = static_cast(A.getInfoCache()); if (const auto *AssumedGroupSize = A.getAAFor( - *this, IRPosition::function(*F), DepClassTy::REQUIRED)) { + *this, IRPosition::function(*F), DepClassTy::REQUIRED); + AssumedGroupSize->isValidState()) { unsigned Min, Max; std::tie(Min, Max) = InfoCache.getWavesPerEU( @@ -864,7 +866,8 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); const auto *AssumedGroupSize = A.getAAFor( *this, IRPosition::function(*Func), DepClassTy::REQUIRED); - if (!CallerInfo || !AssumedGroupSize) + if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() || + !AssumedGroupSize->isValidState()) return false; unsigned Min, Max; @@ -982,7 +985,8 @@ struct AAAMDGPUNoAGPR // TODO: Handle callsite attributes const auto *CalleeInfo = A.getAAFor( *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); - return CalleeInfo && CalleeInfo->getAssumed(); + return CalleeInfo && CalleeInfo->isValidState() && + CalleeInfo->getAssumed(); }; bool UsedAssumedInformation = false; From cc60c46e39b0fffadc83a905b37d98aff426ac17 Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Tue, 29 Oct 2024 21:06:43 -0700 Subject: [PATCH 389/425] specify clang --target to fix breakage on AIX (#114127) `-fprofile-sample-use` is not supported on AIX, which caused a CI failure. --- clang/test/CodeGen/pgo-cold-function-coverage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGen/pgo-cold-function-coverage.c b/clang/test/CodeGen/pgo-cold-function-coverage.c index fd1e1e7e14cda56..3003cdc3e15e02c 100644 --- a/clang/test/CodeGen/pgo-cold-function-coverage.c +++ b/clang/test/CodeGen/pgo-cold-function-coverage.c @@ -1,7 +1,7 @@ // Test -fprofile-generate-cold-function-coverage // RUN: rm -rf %t && split-file %s %t -// RUN: %clang -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%t/pgo-cold-func.prof -S -emit-llvm -o - %t/pgo-cold-func.c | FileCheck %s +// RUN: %clang --target=x86_64 -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%t/pgo-cold-func.prof -S -emit-llvm -o - %t/pgo-cold-func.c | FileCheck %s // CHECK: @__llvm_profile_filename = {{.*}} c"/xxx/yyy/default_%m.profraw\00" From 8420dbf2b98edcaf966281912e7a2a4f7a2d6572 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Wed, 30 Oct 2024 12:22:28 +0800 Subject: [PATCH 390/425] [VPlan] Refine the constructor of VPWidenIntrinsicRecipe. nfc (#113890) Infers member MayReadFromMemory, MayWriteToMemory, and MayHaveSideEffects based on intrinsic attributes. --------- Co-authored-by: Florian Hahn --- llvm/lib/Transforms/Vectorize/VPlan.h | 17 +++++++++++------ .../Transforms/Vectorize/VPlanTransforms.cpp | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8d6025c89f72791..0e0c64f6df9cbae 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1688,13 +1688,18 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, - bool MayReadFromMemory, bool MayWriteToMemory, - bool MayHaveSideEffects, DebugLoc DL = {}) + DebugLoc DL = {}) : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments), - VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), - MayReadFromMemory(MayReadFromMemory), - MayWriteToMemory(MayWriteToMemory), - MayHaveSideEffects(MayHaveSideEffects) {} + VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) { + LLVMContext &Ctx = Ty->getContext(); + AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID); + MemoryEffects ME = Attrs.getMemoryEffects(); + MayReadFromMemory = ME.onlyWritesMemory(); + MayWriteToMemory = ME.onlyReadsMemory(); + MayHaveSideEffects = MayWriteToMemory || + !Attrs.hasFnAttr(Attribute::NoUnwind) || + !Attrs.hasFnAttr(Attribute::WillReturn); + } ~VPWidenIntrinsicRecipe() override = default; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 03c4110761ac6a9..355781f955052e9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1489,7 +1489,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { Ops.push_back(&EVL); return new VPWidenIntrinsicRecipe(Intrinsic::vp_select, Ops, TypeInfo.inferScalarType(Sel), - false, false, false); + Sel->getDebugLoc()); }) .Default([&](VPRecipeBase *R) { return nullptr; }); From f672cc1ee1a4315f83f08cdca7dd2ccf099ff09c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Oct 2024 21:23:31 -0700 Subject: [PATCH 391/425] [RISCV] Add OperandType for condition code arguments used by select and SFB pseudos. (#114163) --- .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 4 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 3 + llvm/lib/Target/RISCV/RISCVInstrInfo.td | 7 ++- llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td | 58 +++++++++---------- llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td | 2 +- 5 files changed, 42 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index e18329c3d2dd495..d3899425ff843de 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -335,7 +335,9 @@ enum OperandType : unsigned { OPERAND_FRMARG, // Operand is a 3-bit rounding mode where only RTZ is valid. OPERAND_RTZARG, - OPERAND_LAST_RISCV_IMM = OPERAND_RTZARG, + // Condition code used by select and short forward branch pseudos. + OPERAND_COND_CODE, + OPERAND_LAST_RISCV_IMM = OPERAND_COND_CODE, // Operand is either a register or uimm5, this is used by V extension pseudo // instructions to represent a value that be passed as AVL to either vsetvli // or vsetivli. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 20e531657eb2860..0cfe4eb063485f9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2542,6 +2542,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_RTZARG: Ok = Imm == RISCVFPRndMode::RTZ; break; + case RISCVOp::OPERAND_COND_CODE: + Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID; + break; } if (!Ok) { ErrInfo = "Invalid immediate"; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 86cc638fd04ac2e..a867368235584c0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -387,6 +387,11 @@ def csr_sysreg : RISCVOp, TImmLeaf(Imm);"> { // A parameterized register class alternative to i32imm/i64imm from Target.td. def ixlenimm : Operand; +// Condition code used by select and short forward branch pseudos. +def cond_code : RISCVOp { + let OperandType = "OPERAND_COND_CODE"; +} + def ixlenimm_li : Operand { let ParserMatchClass = ImmXLenAsmOperand<"", "LI">; } @@ -1450,7 +1455,7 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc, multiclass SelectCC_GPR_rrirr { let usesCustomInserter = 1 in def _Using_CC_GPR : Pseudo<(outs valty:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, valty:$truev, valty:$falsev), [(set valty:$dst, (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td index f25dc7302608baf..16cc0e5a61f0bc3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td @@ -15,7 +15,7 @@ let Predicates = [HasShortForwardBranchOpt], isSelect = 1, // This instruction moves $truev to $dst when the condition is true. It will // be expanded to control flow in RISCVExpandPseudoInsts. def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$truev), [(set GPR:$dst, (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), @@ -34,7 +34,7 @@ let Predicates = [HasConditionalMoveFusion, NoShortForwardBranchOpt], // be expanded to control flow in RISCVExpandPseudoInsts. // We use GPRNoX0 because c.mv cannot encode X0. def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPRNoX0:$falsev, GPRNoX0:$truev), [(set GPRNoX0:$dst, (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), @@ -51,143 +51,143 @@ def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst), let Predicates = [HasShortForwardBranchOpt], hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, Constraints = "$dst = $falsev" in { def PseudoCCADD : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSUB : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSLL : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRL : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRA : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCAND : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCOR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCXOR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCADDI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSLLI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRLI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRAI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCANDI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCORI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCXORI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; // RV64I instructions def PseudoCCADDW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSUBW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSLLW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRLW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRAW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCADDIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSLLIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRLIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRAIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; // Zbb/Zbkb instructions def PseudoCCANDN : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCORN : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCXNOR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index b54baa16d9286ba..4478e2461110806 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -818,7 +818,7 @@ let Predicates = [HasVendorXCVbi, IsRV32], AddedComplexity = 2 in { let usesCustomInserter = 1 in def Select_GPR_Using_CC_Imm : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, simm5:$imm5, ixlenimm:$cc, + (ins GPR:$lhs, simm5:$imm5, cond_code:$cc, GPR:$truev, GPR:$falsev), []>; From 922a0d3dfe2db7a2ef50e8cef4537fa94a7b95bb Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 30 Oct 2024 00:42:44 -0400 Subject: [PATCH 392/425] [NFC][AMDGPU][Attributor] Exit earlier if entry CC (#114177) Avoid calling TTI or other stuff unnecessarily --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 6a69b9d2bfc7161..04d3e482359adeb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -767,14 +767,17 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); + + if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) { + indicatePessimisticFixpoint(); + return; + } + auto &InfoCache = static_cast(A.getInfoCache()); unsigned MinGroupSize, MaxGroupSize; std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); intersectKnown( ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); - - if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) - indicatePessimisticFixpoint(); } ChangeStatus updateImpl(Attributor &A) override { @@ -833,6 +836,12 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); + + if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) { + indicatePessimisticFixpoint(); + return; + } + auto &InfoCache = static_cast(A.getInfoCache()); if (const auto *AssumedGroupSize = A.getAAFor( @@ -847,9 +856,6 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); intersectKnown(Range); } - - if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) - indicatePessimisticFixpoint(); } ChangeStatus updateImpl(Attributor &A) override { From 9a7519fdb39f21a807189e1ed06826b43db929e1 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 30 Oct 2024 00:53:43 -0400 Subject: [PATCH 393/425] Revert "[NFC][AMDGPU][Attributor] Exit earlier if entry CC (#114177)" This reverts commit 922a0d3dfe2db7a2ef50e8cef4537fa94a7b95bb. --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 04d3e482359adeb..6a69b9d2bfc7161 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -767,17 +767,14 @@ struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute { void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); - - if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) { - indicatePessimisticFixpoint(); - return; - } - auto &InfoCache = static_cast(A.getInfoCache()); unsigned MinGroupSize, MaxGroupSize; std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F); intersectKnown( ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1))); + + if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) + indicatePessimisticFixpoint(); } ChangeStatus updateImpl(Attributor &A) override { @@ -836,12 +833,6 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { void initialize(Attributor &A) override { Function *F = getAssociatedFunction(); - - if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) { - indicatePessimisticFixpoint(); - return; - } - auto &InfoCache = static_cast(A.getInfoCache()); if (const auto *AssumedGroupSize = A.getAAFor( @@ -856,6 +847,9 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { ConstantRange Range(APInt(32, Min), APInt(32, Max + 1)); intersectKnown(Range); } + + if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) + indicatePessimisticFixpoint(); } ChangeStatus updateImpl(Attributor &A) override { From bb3915149a7c9b1660db9caebfc96343352e8454 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Tue, 29 Oct 2024 22:10:33 -0700 Subject: [PATCH 394/425] [MemProf] Support for random hotness when writing profile (#113998) Add support for generating random hotness in the memprof profile writer, to be used for testing. The random seed is printed to stderr, and an additional option enables providing a specific seed in order to reproduce a particular random profile. --- .../llvm/ProfileData/InstrProfWriter.h | 10 ++++- llvm/include/llvm/ProfileData/MemProf.h | 9 ++++ llvm/lib/ProfileData/InstrProfWriter.cpp | 42 +++++++++++++++++-- llvm/test/Transforms/PGOProfile/memprof.ll | 19 +++++++++ llvm/tools/llvm-profdata/llvm-profdata.cpp | 12 +++++- 5 files changed, 86 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index b8b6c684717b058..559549b0a22cc97 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -78,12 +78,20 @@ class InstrProfWriter { // Whether to serialize the full schema. bool MemProfFullSchema; + // Whether to generated random memprof hotness for testing. + bool MemprofGenerateRandomHotness; + public: + // For memprof testing, random hotness can be assigned to the contexts if + // MemprofGenerateRandomHotness is enabled. The random seed can be either + // provided by MemprofGenerateRandomHotnessSeed, or if that is 0, one will be + // generated in the writer using the current time. InstrProfWriter( bool Sparse = false, uint64_t TemporalProfTraceReservoirSize = 0, uint64_t MaxTemporalProfTraceLength = 0, bool WritePrevVersion = false, memprof::IndexedVersion MemProfVersionRequested = memprof::Version0, - bool MemProfFullSchema = false); + bool MemProfFullSchema = false, bool MemprofGenerateRandomHotness = false, + unsigned MemprofGenerateRandomHotnessSeed = 0); ~InstrProfWriter(); StringMap &getProfileData() { return FunctionData; } diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index f8121d35732518b..da2cc807370095d 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -147,6 +147,15 @@ struct PortableMemInfoBlock { return Name; \ } #include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + + // Define setters for each type which can be called by the writer. +#define MIBEntryDef(NameTag, Name, Type) \ + void set##Name(Type NewVal) { \ + assert(Schema[llvm::to_underlying(Meta::Name)]); \ + Name = NewVal; \ + } +#include "llvm/ProfileData/MIBEntryDef.inc" #undef MIBEntryDef void clear() { *this = PortableMemInfoBlock(); } diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 1a3721bf1035033..f09241681b92a5a 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -184,13 +185,25 @@ class InstrProfRecordWriterTrait { InstrProfWriter::InstrProfWriter( bool Sparse, uint64_t TemporalProfTraceReservoirSize, uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion, - memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema) + memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, + bool MemprofGenerateRandomHotness, + unsigned MemprofGenerateRandomHotnessSeed) : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength), TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize), InfoObj(new InstrProfRecordWriterTrait()), WritePrevVersion(WritePrevVersion), MemProfVersionRequested(MemProfVersionRequested), - MemProfFullSchema(MemProfFullSchema) {} + MemProfFullSchema(MemProfFullSchema), + MemprofGenerateRandomHotness(MemprofGenerateRandomHotness) { + // Set up the random number seed if requested. + if (MemprofGenerateRandomHotness) { + unsigned seed = MemprofGenerateRandomHotnessSeed + ? MemprofGenerateRandomHotnessSeed + : std::time(nullptr); + errs() << "random hotness seed = " << seed << "\n"; + std::srand(seed); + } +} InstrProfWriter::~InstrProfWriter() { delete InfoObj; } @@ -273,13 +286,34 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, void InstrProfWriter::addMemProfRecord( const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) { - auto [Iter, Inserted] = MemProfData.Records.insert({Id, Record}); + auto NewRecord = Record; + // Provoke random hotness values if requested. We specify the lifetime access + // density and lifetime length that will result in a cold or not cold hotness. + // See the logic in getAllocType() in Analysis/MemoryProfileInfo.cpp. + if (MemprofGenerateRandomHotness) { + for (auto &Alloc : NewRecord.AllocSites) { + // To get a not cold context, set the lifetime access density to the + // maximum value and the lifetime to 0. + uint64_t NewTLAD = std::numeric_limits::max(); + uint64_t NewTL = 0; + bool IsCold = std::rand() % 2; + if (IsCold) { + // To get a cold context, set the lifetime access density to 0 and the + // lifetime to the maximum value. + NewTLAD = 0; + NewTL = std::numeric_limits::max(); + } + Alloc.Info.setTotalLifetimeAccessDensity(NewTLAD); + Alloc.Info.setTotalLifetime(NewTL); + } + } + auto [Iter, Inserted] = MemProfData.Records.insert({Id, NewRecord}); // If we inserted a new record then we are done. if (Inserted) { return; } memprof::IndexedMemProfRecord &Existing = Iter->second; - Existing.merge(Record); + Existing.merge(NewRecord); } bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id, diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index e1457ca7251ed88..205eeb8878989d2 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -66,6 +66,18 @@ ;; Check that the total sizes are reported if requested. ; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZES +;; Make sure we emit a random hotness seed if requested. +; RUN: llvm-profdata merge -memprof-random-hotness %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand 2>&1 | FileCheck %s --check-prefix=RAND +; RAND: random hotness seed = +;; Can't check the exact values, but make sure applying the random profile +;; succeeds with the same stats +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=ALL,MEMPROFONLY,MEMPROFSTATS + +;; Make sure we use a specific random hotness seed if requested. +; RUN: llvm-profdata merge -memprof-random-hotness -memprof-random-hotness-seed=1730170724 %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand2 2>&1 | FileCheck %s --check-prefix=RAND2 +; RAND2: random hotness seed = 1730170724 +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROFRAND2,ALL,MEMPROFONLY,MEMPROFSTATS + ; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched ; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched ; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched @@ -372,6 +384,13 @@ for.end: ; preds = %for.cond ; MEMPROFNOCOLINFO: ![[C10]] = !{i64 -4535090212904553409} ; MEMPROFNOCOLINFO: ![[C11]] = !{i64 3577763375057267810} +;; For the specific random seed, this is the expected order of hotness +; MEMPROFRAND2: !"cold" +; MEMPROFRAND2: !"cold" +; MEMPROFRAND2: !"cold" +; MEMPROFRAND2: !"hot" +; MEMPROFRAND2: !"hot" + ; MEMPROFSTATS: 8 memprof - Number of alloc contexts in memory profile. ; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile. ; MEMPROFSTATS: 6 memprof - Number of functions having valid memory profile. diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 59f0f1f1fae8992..f7023aa966adf6f 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -342,6 +342,15 @@ cl::opt MemProfFullSchema( "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand), cl::desc("Use the full schema for serialization"), cl::init(false)); +static cl::opt + MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(false), + cl::Hidden, cl::sub(MergeSubcommand), + cl::desc("Generate random hotness values")); +static cl::opt MemprofGenerateRandomHotnessSeed( + "memprof-random-hotness-seed", cl::init(0), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("Random hotness seed to use (0 to generate new seed)")); + // Options specific to overlap subcommand. cl::opt BaseFilename(cl::Positional, cl::Required, cl::desc(""), @@ -641,7 +650,8 @@ struct WriterContext { SmallSet &WriterErrorCodes, uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0) : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion, - MemProfVersionRequested, MemProfFullSchema), + MemProfVersionRequested, MemProfFullSchema, + MemprofGenerateRandomHotness, MemprofGenerateRandomHotnessSeed), ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {} }; From 6d9fc1b84619ca22f3e70d581c87940bcfbf3a93 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 29 Oct 2024 22:14:24 -0700 Subject: [PATCH 395/425] AMDGPU: Fix producing invalid IR on vector typed getelementptr (#114113) This did not consider the IR change to allow a scalar base with a vector offset part. Reject any users that are not explicitly handled. In this situation we could handle the vector GEP, but that is a larger change. This just avoids the IR verifier error by rejecting it. --- .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 18 ++++++-- .../promote-alloca-invalid-vector-gep.ll | 44 +++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index f8744d6a483cffe..7dd7388376f4743 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -1159,7 +1159,6 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( if (LoadInst *LI = dyn_cast(UseInst)) { if (LI->isVolatile()) return false; - continue; } @@ -1170,12 +1169,19 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( // Reject if the stored value is not the pointer operand. if (SI->getPointerOperand() != Val) return false; - } else if (AtomicRMWInst *RMW = dyn_cast(UseInst)) { + continue; + } + + if (AtomicRMWInst *RMW = dyn_cast(UseInst)) { if (RMW->isVolatile()) return false; - } else if (AtomicCmpXchgInst *CAS = dyn_cast(UseInst)) { + continue; + } + + if (AtomicCmpXchgInst *CAS = dyn_cast(UseInst)) { if (CAS->isVolatile()) return false; + continue; } // Only promote a select if we know that the other select operand @@ -1186,6 +1192,7 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( // May need to rewrite constant operands. WorkList.push_back(ICmp); + continue; } // TODO: If we know the address is only observed through flat pointers, we @@ -1198,8 +1205,9 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( if (isa(User) || isa(User)) return false; + // TODO: Handle vectors of pointers. if (!User->getType()->isPointerTy()) - continue; + return false; if (GetElementPtrInst *GEP = dyn_cast(UseInst)) { // Be conservative if an address could be computed outside the bounds of @@ -1504,6 +1512,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I, PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS); + assert(isa(V->getType())); + // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll new file mode 100644 index 000000000000000..b0d578e421e280c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s + +; Check that invalid IR is not produced on a vector typed +; getelementptr with a scalar alloca pointer base. + +define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { +; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[GETELEMENTPTR]], i64 0 +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4 +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, align 4, addrspace(5) + %getelementptr = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> + %extractelement = extractelement <4 x ptr addrspace(5)> %getelementptr, i64 0 + store i32 0, ptr addrspace(5) %extractelement + ret void +} + +define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(i1 %cond) { +; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> +; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(5)> [[GETELEMENTPTR0]], <4 x ptr addrspace(5)> [[GETELEMENTPTR1]] +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[SELECT]], i64 1 +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4 +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, align 4, addrspace(5) + %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> + %getelementptr1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> + %select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> %getelementptr1 + %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1 + store i32 0, ptr addrspace(5) %extractelement + ret void +} From 62ff85f0799560b42754ef77b5f64ca2c7feeff7 Mon Sep 17 00:00:00 2001 From: Santhosh Kumar Ellendula Date: Wed, 30 Oct 2024 10:50:59 +0530 Subject: [PATCH 396/425] [lldb-dap] Fix for missing 'raw_string_ostream::flush' removal in ProgressEvent.cpp; addressing #108745 (#114087) I hope it was missed unintentionally, pushing the same for the review. Ref: https://github.com/llvm/llvm-project/pull/108745 --------- Co-authored-by: Santhosh Kumar Ellendula Co-authored-by: Santhosh Kumar Ellendula --- lldb/tools/lldb-dap/ProgressEvent.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lldb/tools/lldb-dap/ProgressEvent.cpp b/lldb/tools/lldb-dap/ProgressEvent.cpp index 8a660b50af1205b..0dcc2ee81001d50 100644 --- a/lldb/tools/lldb-dap/ProgressEvent.cpp +++ b/lldb/tools/lldb-dap/ProgressEvent.cpp @@ -110,7 +110,6 @@ json::Value ProgressEvent::ToJSON() const { std::string progress_id_str; llvm::raw_string_ostream progress_id_strm(progress_id_str); progress_id_strm << m_progress_id; - progress_id_strm.flush(); body.try_emplace("progressId", progress_id_str); if (m_event_type == progressStart) { From e7262c15d3a2aef7cf4065e654181ab86eed24cc Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 29 Oct 2024 22:34:47 -0700 Subject: [PATCH 397/425] [RISCV] Add OperandType for sew and vecpolicy operands. (#114168) --- .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 6 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 6 + .../Target/RISCV/RISCVInstrInfoVPseudos.td | 146 +++++++++--------- llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td | 12 +- llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 4 +- 5 files changed, 96 insertions(+), 78 deletions(-) diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index d3899425ff843de..b3a6cd40ea039b0 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -337,7 +337,11 @@ enum OperandType : unsigned { OPERAND_RTZARG, // Condition code used by select and short forward branch pseudos. OPERAND_COND_CODE, - OPERAND_LAST_RISCV_IMM = OPERAND_COND_CODE, + // Vector policy operand. + OPERAND_VEC_POLICY, + // Vector SEW operand. + OPERAND_SEW, + OPERAND_LAST_RISCV_IMM = OPERAND_SEW, // Operand is either a register or uimm5, this is used by V extension pseudo // instructions to represent a value that be passed as AVL to either vsetvli // or vsetivli. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 0cfe4eb063485f9..d5b086861d71e61 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2545,6 +2545,12 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_COND_CODE: Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID; break; + case RISCVOp::OPERAND_VEC_POLICY: + Ok = (Imm & (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) == Imm; + break; + case RISCVOp::OPERAND_SEW: + Ok = Imm == 0 || (Imm >= 3 && Imm <= 6); + break; } if (!Ok) { ErrInfo = "Invalid immediate"; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index af4f653f57afd5a..6ffdae1d7df2aed 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -84,6 +84,14 @@ def AVL : RegisterOperand { let OperandType = "OPERAND_AVL"; } +def vec_policy : RISCVOp { + let OperandType = "OPERAND_VEC_POLICY"; +} + +def sew : RISCVOp { + let OperandType = "OPERAND_SEW"; +} + // X0 has special meaning for vsetvl/vsetvli. // rd | rs1 | AVL value | Effect on vl //-------------------------------------------------------------- @@ -764,8 +772,8 @@ class GetVTypePredicates { class VPseudoUSLoadNoMask : Pseudo<(outs RetClass:$rd), - (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, + (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -782,7 +790,7 @@ class VPseudoUSLoadMask.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -799,7 +807,7 @@ class VPseudoUSLoadFFNoMask : Pseudo<(outs RetClass:$rd, GPR:$vl), (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -816,7 +824,7 @@ class VPseudoUSLoadFFMask.R:$rd, GPR:$vl), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$avl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -833,7 +841,7 @@ class VPseudoSLoadNoMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -850,7 +858,7 @@ class VPseudoSLoadMask.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, GPR:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -872,7 +880,7 @@ class VPseudoILoadNoMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLX { let mayLoad = 1; @@ -895,7 +903,7 @@ class VPseudoILoadMask.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, IdxClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLX { let mayLoad = 1; @@ -912,7 +920,7 @@ class VPseudoILoadMask : Pseudo<(outs), - (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>, + (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -926,7 +934,7 @@ class VPseudoUSStoreMask : Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -940,7 +948,7 @@ class VPseudoSStoreNoMask : Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -954,7 +962,7 @@ class VPseudoSStoreMask : Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -967,7 +975,7 @@ class VPseudoSStoreMask : Pseudo<(outs RegClass:$rd), (ins RegClass:$passthru, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -981,7 +989,7 @@ class VPseudoNullaryNoMask : class VPseudoNullaryMask : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -996,7 +1004,7 @@ class VPseudoNullaryMask : // Nullary for pseudo instructions. They are expanded in // RISCVExpandPseudoInsts pass. class VPseudoNullaryPseudoM : - Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>, + Pseudo<(outs VR:$rd), (ins AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1016,7 +1024,7 @@ class VPseudoUnaryNoMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, OpClass:$rs2, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1033,7 +1041,7 @@ class VPseudoUnaryNoMaskNoPolicy : Pseudo<(outs RetClass:$rd), - (ins OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>, + (ins OpClass:$rs2, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1050,7 +1058,7 @@ class VPseudoUnaryNoMaskRoundingMode : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1070,7 +1078,7 @@ class VPseudoUnaryMask : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1090,7 +1098,7 @@ class VPseudoUnaryMaskRoundingMode.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, VMaskOp:$vm, ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1110,7 +1118,7 @@ class VPseudoUnaryMask_NoExcept : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> { + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1128,7 +1136,7 @@ class VPseudoUnaryNoMask_FRM : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$frm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1148,7 +1156,7 @@ class VPseudoUnaryMask_FRM.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, VMaskOp:$vm, ixlenimm:$frm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1164,7 +1172,7 @@ class VPseudoUnaryMask_FRM, + (ins VR:$rs2, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1175,7 +1183,7 @@ class VPseudoUnaryNoMaskGPROut : class VPseudoUnaryMaskGPROut : Pseudo<(outs GPR:$rd), - (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1189,7 +1197,7 @@ class VPseudoUnaryAnyMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, - VR:$vm, AVL:$vl, ixlenimm:$sew), []>, + VR:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1205,7 +1213,7 @@ class VPseudoBinaryNoMask : Pseudo<(outs RetClass:$rd), - (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1223,7 +1231,7 @@ class VPseudoBinaryNoMaskPolicy : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1243,7 +1251,7 @@ class VPseudoBinaryNoMaskRoundingMode : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1266,7 +1274,7 @@ class VPseudoBinaryMaskPolicyRoundingMode.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, VMaskOp:$vm, ixlenimm:$rm, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1288,8 +1296,8 @@ class VPseudoTiedBinaryNoMask : Pseudo<(outs RetClass:$rd), - (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, + (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1310,8 +1318,8 @@ class VPseudoTiedBinaryNoMaskRoundingMode, + AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1331,7 +1339,7 @@ class VPseudoIStoreNoMask LMUL, bit Ordered>: Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, - ixlenimm:$sew),[]>, + sew:$sew),[]>, RISCVVPseudo, RISCVVSX { let mayLoad = 0; @@ -1345,7 +1353,7 @@ class VPseudoIStoreMask LMUL, bit Ordered>: Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, sew:$sew),[]>, RISCVVPseudo, RISCVVSX { let mayLoad = 0; @@ -1363,7 +1371,7 @@ class VPseudoBinaryMaskPolicy.R:$rd), (ins GetVRegNoV0.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1382,7 +1390,7 @@ class VPseudoTernaryMaskPolicy.R:$rd), (ins GetVRegNoV0.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1401,7 +1409,7 @@ class VPseudoTernaryMaskPolicyRoundingMode, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1423,7 +1431,7 @@ class VPseudoBinaryMOutMask, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1445,7 +1453,7 @@ class VPseudoTiedBinaryMask.R:$rd), (ins GetVRegNoV0.R:$passthru, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1468,7 +1476,7 @@ class VPseudoTiedBinaryMaskRoundingMode, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1494,9 +1502,9 @@ class VPseudoBinaryCarry, + AVL:$vl, sew:$sew)), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1515,7 +1523,7 @@ class VPseudoTiedBinaryCarryIn : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMV0:$carry, AVL:$vl, ixlenimm:$sew), []>, + VMV0:$carry, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1534,7 +1542,7 @@ class VPseudoTernaryNoMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1551,7 +1559,7 @@ class VPseudoTernaryNoMaskWithPolicy : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1570,7 +1578,7 @@ class VPseudoTernaryNoMaskWithPolicyRoundingMode : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + ixlenimm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1589,7 +1597,7 @@ class VPseudoUSSegLoadNoMask NF> : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1606,7 +1614,7 @@ class VPseudoUSSegLoadMask NF> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1624,7 +1632,7 @@ class VPseudoUSSegLoadFFNoMask NF> : Pseudo<(outs RetClass:$rd, GPR:$vl), (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1641,7 +1649,7 @@ class VPseudoUSSegLoadFFMask NF> : Pseudo<(outs GetVRegNoV0.R:$rd, GPR:$vl), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$avl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1659,7 +1667,7 @@ class VPseudoSSegLoadNoMask NF> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, GPRMem:$rs1, GPR:$offset, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1676,8 +1684,8 @@ class VPseudoSSegLoadMask NF> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, + GPR:$offset, VMaskOp:$vm, AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1698,7 +1706,7 @@ class VPseudoISegLoadNoMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, GPRMem:$rs1, IdxClass:$offset, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLXSEG { let mayLoad = 1; @@ -1720,8 +1728,8 @@ class VPseudoISegLoadMask : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, + IdxClass:$offset, VMaskOp:$vm, AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo, RISCVVLXSEG { let mayLoad = 1; @@ -1740,7 +1748,7 @@ class VPseudoUSSegStoreNoMask NF> : Pseudo<(outs), - (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>, + (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1755,7 +1763,7 @@ class VPseudoUSSegStoreMask NF> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1770,7 +1778,7 @@ class VPseudoSSegStoreNoMask NF> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, GPR:$offset, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1785,7 +1793,7 @@ class VPseudoSSegStoreMask NF> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, GPR: $offset, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1803,7 +1811,7 @@ class VPseudoISegStoreNoMask : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSXSEG { let mayLoad = 0; @@ -1821,7 +1829,7 @@ class VPseudoISegStoreMask : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSXSEG { let mayLoad = 0; @@ -6762,13 +6770,13 @@ let Predicates = [HasVInstructions] in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { let HasSEWOp = 1, BaseInstr = VMV_X_S in def PseudoVMV_X_S: - Pseudo<(outs GPR:$rd), (ins VR:$rs2, ixlenimm:$sew), []>, + Pseudo<(outs GPR:$rd), (ins VR:$rs2, sew:$sew), []>, Sched<[WriteVMovXS, ReadVMovXS]>, RISCVVPseudo; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, isReMaterializable = 1, Constraints = "$rd = $rs1" in def PseudoVMV_S_X: Pseudo<(outs VR:$rd), - (ins VR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew), + (ins VR:$rs1, GPR:$rs2, AVL:$vl, sew:$sew), []>, Sched<[WriteVMovSX, ReadVMovSX_V, ReadVMovSX_X]>, RISCVVPseudo; @@ -6785,14 +6793,14 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { let HasSEWOp = 1, BaseInstr = VFMV_F_S in def "PseudoVFMV_" # f.FX # "_S" : Pseudo<(outs f.fprclass:$rd), - (ins VR:$rs2, ixlenimm:$sew), []>, + (ins VR:$rs2, sew:$sew), []>, Sched<[WriteVMovFS, ReadVMovFS]>, RISCVVPseudo; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1, Constraints = "$rd = $rs1" in def "PseudoVFMV_S_" # f.FX : Pseudo<(outs VR:$rd), - (ins VR:$rs1, f.fprclass:$rs2, AVL:$vl, ixlenimm:$sew), + (ins VR:$rs1, f.fprclass:$rs2, AVL:$vl, sew:$sew), []>, Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>, RISCVVPseudo; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 5068d0be0fb49bd..81467ada004487e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -230,7 +230,7 @@ let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf class VPseudoVC_X : Pseudo<(outs), (ins OpClass:$op1, payload5:$rs2, payload5:$rd, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -243,7 +243,7 @@ class VPseudoVC_X : class VPseudoVC_XV : Pseudo<(outs), (ins OpClass:$op1, payload5:$rd, RS2Class:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -257,7 +257,7 @@ class VPseudoVC_XVV : Pseudo<(outs), (ins OpClass:$op1, RDClass:$rd, RS2Class:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -270,7 +270,7 @@ class VPseudoVC_XVV : Pseudo<(outs RDClass:$rd), (ins OpClass:$op1, payload5:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -284,7 +284,7 @@ class VPseudoVC_V_XV : Pseudo<(outs RDClass:$rd), (ins OpClass:$op1, RS2Class:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -298,7 +298,7 @@ class VPseudoVC_V_XVV : Pseudo<(outs RDClass:$rd), (ins OpClass:$op1, RDClass:$rs3, RS2Class:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 7ec13e4eaafa7d3..782651fd6d01975 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -231,7 +231,7 @@ class ZvkMxSet { class VPseudoBinaryNoMask_Zvk : Pseudo<(outs RetClass:$rd_wb), - (ins RetClass:$rd, OpClass:$rs2, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + (ins RetClass:$rd, OpClass:$rs2, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -248,7 +248,7 @@ class VPseudoTernaryNoMask_Zvk : Pseudo<(outs RetClass:$rd_wb), (ins RetClass:$rd, Op1Class:$rs2, Op2Class:$rs1, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; From f1467b3f73e2849fd8349ff215cf01987fa51a9d Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 30 Oct 2024 13:59:56 +0800 Subject: [PATCH 398/425] [SDAG][NFC] Convert `SDNodeFlags` into an enumeration (#114167) This patch converts `SDNodeFlags` into an enumeration as we did for `FastMathFlags`. It simplifies the implementation and improves compile-time. This patch is NFC since it doesn't break SDNodeFlags API. --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 159 ++++++++---------- 1 file changed, 73 insertions(+), 86 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index bda0120a2df4aa2..26488413fe5826e 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -378,36 +378,48 @@ template<> struct simplify_type { /// the backend. struct SDNodeFlags { private: - bool NoUnsignedWrap : 1; - bool NoSignedWrap : 1; - bool Exact : 1; - bool Disjoint : 1; - bool NonNeg : 1; - bool NoNaNs : 1; - bool NoInfs : 1; - bool NoSignedZeros : 1; - bool AllowReciprocal : 1; - bool AllowContract : 1; - bool ApproximateFuncs : 1; - bool AllowReassociation : 1; - - // We assume instructions do not raise floating-point exceptions by default, - // and only those marked explicitly may do so. We could choose to represent - // this via a positive "FPExcept" flags like on the MI level, but having a - // negative "NoFPExcept" flag here makes the flag intersection logic more - // straightforward. - bool NoFPExcept : 1; - // Instructions with attached 'unpredictable' metadata on IR level. - bool Unpredictable : 1; + friend class SDNode; + + unsigned Flags = 0; + + template void setFlag(bool B) { + Flags = (Flags & ~Flag) | (B ? Flag : 0); + } public: + enum : unsigned { + None = 0, + NoUnsignedWrap = 1 << 0, + NoSignedWrap = 1 << 1, + Exact = 1 << 2, + Disjoint = 1 << 3, + NonNeg = 1 << 4, + NoNaNs = 1 << 5, + NoInfs = 1 << 6, + NoSignedZeros = 1 << 7, + AllowReciprocal = 1 << 8, + AllowContract = 1 << 9, + ApproximateFuncs = 1 << 10, + AllowReassociation = 1 << 11, + + // We assume instructions do not raise floating-point exceptions by default, + // and only those marked explicitly may do so. We could choose to represent + // this via a positive "FPExcept" flags like on the MI level, but having a + // negative "NoFPExcept" flag here makes the flag intersection logic more + // straightforward. + NoFPExcept = 1 << 12, + // Instructions with attached 'unpredictable' metadata on IR level. + Unpredictable = 1 << 13, + + // NOTE: Please update LargestValue in LLVM_DECLARE_ENUM_AS_BITMASK below + // the class definition when adding new flags. + + PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint | + NonNeg | NoNaNs | NoInfs, + }; + /// Default constructor turns off all optimization flags. - SDNodeFlags() - : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), - Disjoint(false), NonNeg(false), NoNaNs(false), NoInfs(false), - NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), - ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false), - Unpredictable(false) {} + SDNodeFlags() : Flags(0) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -421,71 +433,49 @@ struct SDNodeFlags { } // These are mutators for each flag. - void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } - void setNoSignedWrap(bool b) { NoSignedWrap = b; } - void setExact(bool b) { Exact = b; } - void setDisjoint(bool b) { Disjoint = b; } - void setNonNeg(bool b) { NonNeg = b; } - void setNoNaNs(bool b) { NoNaNs = b; } - void setNoInfs(bool b) { NoInfs = b; } - void setNoSignedZeros(bool b) { NoSignedZeros = b; } - void setAllowReciprocal(bool b) { AllowReciprocal = b; } - void setAllowContract(bool b) { AllowContract = b; } - void setApproximateFuncs(bool b) { ApproximateFuncs = b; } - void setAllowReassociation(bool b) { AllowReassociation = b; } - void setNoFPExcept(bool b) { NoFPExcept = b; } - void setUnpredictable(bool b) { Unpredictable = b; } + void setNoUnsignedWrap(bool b) { setFlag(b); } + void setNoSignedWrap(bool b) { setFlag(b); } + void setExact(bool b) { setFlag(b); } + void setDisjoint(bool b) { setFlag(b); } + void setNonNeg(bool b) { setFlag(b); } + void setNoNaNs(bool b) { setFlag(b); } + void setNoInfs(bool b) { setFlag(b); } + void setNoSignedZeros(bool b) { setFlag(b); } + void setAllowReciprocal(bool b) { setFlag(b); } + void setAllowContract(bool b) { setFlag(b); } + void setApproximateFuncs(bool b) { setFlag(b); } + void setAllowReassociation(bool b) { setFlag(b); } + void setNoFPExcept(bool b) { setFlag(b); } + void setUnpredictable(bool b) { setFlag(b); } // These are accessors for each flag. - bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } - bool hasNoSignedWrap() const { return NoSignedWrap; } - bool hasExact() const { return Exact; } - bool hasDisjoint() const { return Disjoint; } - bool hasNonNeg() const { return NonNeg; } - bool hasNoNaNs() const { return NoNaNs; } - bool hasNoInfs() const { return NoInfs; } - bool hasNoSignedZeros() const { return NoSignedZeros; } - bool hasAllowReciprocal() const { return AllowReciprocal; } - bool hasAllowContract() const { return AllowContract; } - bool hasApproximateFuncs() const { return ApproximateFuncs; } - bool hasAllowReassociation() const { return AllowReassociation; } - bool hasNoFPExcept() const { return NoFPExcept; } - bool hasUnpredictable() const { return Unpredictable; } + bool hasNoUnsignedWrap() const { return Flags & NoUnsignedWrap; } + bool hasNoSignedWrap() const { return Flags & NoSignedWrap; } + bool hasExact() const { return Flags & Exact; } + bool hasDisjoint() const { return Flags & Disjoint; } + bool hasNonNeg() const { return Flags & NonNeg; } + bool hasNoNaNs() const { return Flags & NoNaNs; } + bool hasNoInfs() const { return Flags & NoInfs; } + bool hasNoSignedZeros() const { return Flags & NoSignedZeros; } + bool hasAllowReciprocal() const { return Flags & AllowReciprocal; } + bool hasAllowContract() const { return Flags & AllowContract; } + bool hasApproximateFuncs() const { return Flags & ApproximateFuncs; } + bool hasAllowReassociation() const { return Flags & AllowReassociation; } + bool hasNoFPExcept() const { return Flags & NoFPExcept; } + bool hasUnpredictable() const { return Flags & Unpredictable; } bool operator==(const SDNodeFlags &Other) const { - return NoUnsignedWrap == Other.NoUnsignedWrap && - NoSignedWrap == Other.NoSignedWrap && Exact == Other.Exact && - Disjoint == Other.Disjoint && NonNeg == Other.NonNeg && - NoNaNs == Other.NoNaNs && NoInfs == Other.NoInfs && - NoSignedZeros == Other.NoSignedZeros && - AllowReciprocal == Other.AllowReciprocal && - AllowContract == Other.AllowContract && - ApproximateFuncs == Other.ApproximateFuncs && - AllowReassociation == Other.AllowReassociation && - NoFPExcept == Other.NoFPExcept && - Unpredictable == Other.Unpredictable; + return Flags == Other.Flags; } /// Clear any flags in this flag set that aren't also set in Flags. All /// flags will be cleared if Flags are undefined. - void intersectWith(const SDNodeFlags Flags) { - NoUnsignedWrap &= Flags.NoUnsignedWrap; - NoSignedWrap &= Flags.NoSignedWrap; - Exact &= Flags.Exact; - Disjoint &= Flags.Disjoint; - NonNeg &= Flags.NonNeg; - NoNaNs &= Flags.NoNaNs; - NoInfs &= Flags.NoInfs; - NoSignedZeros &= Flags.NoSignedZeros; - AllowReciprocal &= Flags.AllowReciprocal; - AllowContract &= Flags.AllowContract; - ApproximateFuncs &= Flags.ApproximateFuncs; - AllowReassociation &= Flags.AllowReassociation; - NoFPExcept &= Flags.NoFPExcept; - Unpredictable &= Flags.Unpredictable; - } + void intersectWith(const SDNodeFlags Flags) { this->Flags &= Flags.Flags; } }; +LLVM_DECLARE_ENUM_AS_BITMASK(decltype(SDNodeFlags::None), + SDNodeFlags::Unpredictable); + /// Represents one node in the SelectionDAG. /// class SDNode : public FoldingSetNode, public ilist_node { @@ -1029,10 +1019,7 @@ END_TWO_BYTE_PACK() void intersectFlagsWith(const SDNodeFlags Flags); bool hasPoisonGeneratingFlags() const { - SDNodeFlags Flags = getFlags(); - return Flags.hasNoUnsignedWrap() || Flags.hasNoSignedWrap() || - Flags.hasExact() || Flags.hasDisjoint() || Flags.hasNonNeg() || - Flags.hasNoNaNs() || Flags.hasNoInfs(); + return Flags.Flags & SDNodeFlags::PoisonGeneratingFlags; } void setCFIType(uint32_t Type) { CFIType = Type; } From df0d249b6511289f1e8c1389f4fd33d7b4c083fa Mon Sep 17 00:00:00 2001 From: donald chen Date: Wed, 30 Oct 2024 14:01:49 +0800 Subject: [PATCH 399/425] [mlir] [linalg] fix side effect of linalg op (#114045) Linalg op need to take into account memory side effects happening inside the region when determining their own side effects. This patch fixed issue https://github.com/llvm/llvm-project/issues/112881 --- .../Dialect/Linalg/IR/LinalgStructuredOps.td | 1 + mlir/test/Dialect/Linalg/canonicalize.mlir | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index bfc609bd708164a..c2fee8ea55c960a 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -30,6 +30,7 @@ class LinalgStructuredBase_Op props> SingleBlockImplicitTerminator<"YieldOp">, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, + RecursiveMemoryEffects, DestinationStyleOpInterface, LinalgStructuredInterface, ReifyRankedShapedTypeOpInterface], props)> { diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 4bc2ed140da91a6..5de007b390c51d6 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -1232,3 +1232,20 @@ func.func @transpose_buffer(%input: memref, // CHECK-SAME: %[[VAL_1:.*]]: memref) { // CHECK: linalg.transpose ins(%[[VAL_0]] : memref) // CHECK-SAME: outs(%[[VAL_1]] : memref) permutation = [0] + +// ----- + +// This test checks linalg op has a recursive memory effect. Otherwise +// linalg.map without a user would be DCEd. +func.func @recursive_effect(%arg : tensor<1xf32>) { + %init = arith.constant dense<0.0> : tensor<1xf32> + %mapped = linalg.map ins(%arg:tensor<1xf32>) outs(%init :tensor<1xf32>) + (%in : f32) { + vector.print %in : f32 + linalg.yield %in : f32 + } + func.return +} + +// CHECK-LABEL: @recursive_effect +// CHECK: linalg.map From b47e2316bf083cd2e0e5ac2ef1e9c913f839a51b Mon Sep 17 00:00:00 2001 From: Ryosuke Niwa Date: Tue, 29 Oct 2024 23:13:23 -0700 Subject: [PATCH 400/425] [alpha.webkit.UncountedLocalVarsChecker] Warn the use of a raw pointer/reference when the guardian variable gets mutated. (#113859) This checker has a notion of a guardian variable which is a variable and keeps the object pointed to by a raw pointer / reference in an inner scope alive long enough to "guard" it from use-after-free. But such a guardian variable fails to flawed to keep the object alive if it ever gets mutated within the scope of a raw pointer / reference. This PR fixes this bug by introducing a new AST visitor class, GuardianVisitor, which traverses the compound statements of a guarded variable (raw pointer / reference) and looks for any operator=, move constructor, or calls to "swap", "leakRef", or "releaseNonNull" functions. --- .../WebKit/UncountedLocalVarsChecker.cpp | 72 +++++++++++++++-- .../Analysis/Checkers/WebKit/mock-types.h | 34 +++++++- .../Checkers/WebKit/uncounted-local-vars.cpp | 77 +++++++++++++++++++ 3 files changed, 177 insertions(+), 6 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp index 5cdf047738abcb2..76a4599cc8d7883 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp @@ -48,6 +48,65 @@ bool isRefcountedStringsHack(const VarDecl *V) { return false; } +struct GuardianVisitor : public RecursiveASTVisitor { + using Base = RecursiveASTVisitor; + + const VarDecl *Guardian{nullptr}; + +public: + explicit GuardianVisitor(const VarDecl *Guardian) : Guardian(Guardian) { + assert(Guardian); + } + + bool VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) { + if (auto *VarRef = dyn_cast(BO->getLHS())) { + if (VarRef->getDecl() == Guardian) + return false; + } + } + return true; + } + + bool VisitCXXConstructExpr(const CXXConstructExpr *CE) { + if (auto *Ctor = CE->getConstructor()) { + if (Ctor->isMoveConstructor() && CE->getNumArgs() == 1) { + auto *Arg = CE->getArg(0)->IgnoreParenCasts(); + if (auto *VarRef = dyn_cast(Arg)) { + if (VarRef->getDecl() == Guardian) + return false; + } + } + } + return true; + } + + bool VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + auto MethodName = safeGetName(MCE->getMethodDecl()); + if (MethodName == "swap" || MethodName == "leakRef" || + MethodName == "releaseNonNull") { + auto *ThisArg = MCE->getImplicitObjectArgument()->IgnoreParenCasts(); + if (auto *VarRef = dyn_cast(ThisArg)) { + if (VarRef->getDecl() == Guardian) + return false; + } + } + return true; + } + + bool VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + if (OCE->isAssignmentOp()) { + assert(OCE->getNumArgs() == 2); + auto *ThisArg = OCE->getArg(0)->IgnoreParenCasts(); + if (auto *VarRef = dyn_cast(ThisArg)) { + if (VarRef->getDecl() == Guardian) + return false; + } + } + return true; + } +}; + bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded, const VarDecl *MaybeGuardian) { assert(Guarded); @@ -81,7 +140,7 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded, // We need to skip the first CompoundStmt to avoid situation when guardian is // defined in the same scope as guarded variable. - bool HaveSkippedFirstCompoundStmt = false; + const CompoundStmt *FirstCompondStmt = nullptr; for (DynTypedNodeList guardedVarAncestors = ctx.getParents(*Guarded); !guardedVarAncestors.empty(); guardedVarAncestors = ctx.getParents( @@ -90,12 +149,15 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded, ) { for (auto &guardedVarAncestor : guardedVarAncestors) { if (auto *CStmtAncestor = guardedVarAncestor.get()) { - if (!HaveSkippedFirstCompoundStmt) { - HaveSkippedFirstCompoundStmt = true; + if (!FirstCompondStmt) { + FirstCompondStmt = CStmtAncestor; continue; } - if (CStmtAncestor == guardiansClosestCompStmtAncestor) - return true; + if (CStmtAncestor == guardiansClosestCompStmtAncestor) { + GuardianVisitor guardianVisitor(MaybeGuardian); + auto *GuardedScope = const_cast(FirstCompondStmt); + return guardianVisitor.TraverseCompoundStmt(GuardedScope); + } } } } diff --git a/clang/test/Analysis/Checkers/WebKit/mock-types.h b/clang/test/Analysis/Checkers/WebKit/mock-types.h index 8d8a90f0afae0e1..82c79c97a83de60 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-types.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-types.h @@ -49,7 +49,23 @@ template , typename RefDerefTra Ref() : t{} {}; Ref(T &t) : t(&RefDerefTraits::ref(t)) { } Ref(const Ref& o) : t(RefDerefTraits::refIfNotNull(PtrTraits::unwrap(o.t))) { } + Ref(Ref&& o) : t(o.leakRef()) { } ~Ref() { RefDerefTraits::derefIfNotNull(PtrTraits::exchange(t, nullptr)); } + Ref& operator=(T &t) { + Ref o(t); + swap(o); + return *this; + } + Ref& operator=(Ref &&o) { + Ref m(o); + swap(m); + return *this; + } + void swap(Ref& o) { + typename PtrTraits::StorageType tmp = t; + t = o.t; + o.t = tmp; + } T &get() { return *PtrTraits::unwrap(t); } T *ptr() { return PtrTraits::unwrap(t); } T *operator->() { return PtrTraits::unwrap(t); } @@ -74,11 +90,27 @@ template struct RefPtr { if (t) t->deref(); } + Ref releaseNonNull() { + Ref tmp(*t); + if (t) + t->deref(); + t = nullptr; + return tmp; + } + void swap(RefPtr& o) { + T* tmp = t; + t = o.t; + o.t = tmp; + } T *get() { return t; } T *operator->() { return t; } const T *operator->() const { return t; } T &operator*() { return *t; } - RefPtr &operator=(T *) { return *this; } + RefPtr &operator=(T *t) { + RefPtr o(t); + swap(o); + return *this; + } operator bool() const { return t; } }; diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp index 1c0df42cdda663c..d7fb689557a6fcf 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp @@ -83,6 +83,83 @@ void foo7(RefCountable* obj) { bar.obj->method(); } +void foo8(RefCountable* obj) { + RefPtr foo; + { + RefCountable *bar = foo.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + foo = nullptr; + bar->method(); + } + RefPtr baz; + { + RefCountable *bar = baz.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + baz = obj; + bar->method(); + } + foo = nullptr; + { + RefCountable *bar = foo.get(); + // No warning. It's okay to mutate RefPtr in an outer scope. + bar->method(); + } + foo = obj; + { + RefCountable *bar = foo.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + foo.releaseNonNull(); + bar->method(); + } + { + RefCountable *bar = foo.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + foo = obj ? obj : nullptr; + bar->method(); + } + { + RefCountable *bar = foo->trivial() ? foo.get() : nullptr; + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + foo = nullptr; + bar->method(); + } +} + +void foo9(RefCountable& o) { + Ref guardian(o); + { + RefCountable &bar = guardian.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + guardian = o; // We don't detect that we're setting it to the same value. + bar.method(); + } + { + RefCountable *bar = guardian.ptr(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + Ref other(*bar); // We don't detect other has the same value as guardian. + guardian.swap(other); + bar->method(); + } + { + RefCountable *bar = guardian.ptr(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + Ref other(static_cast&&>(guardian)); + bar->method(); + } + { + RefCountable *bar = guardian.ptr(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + guardian.leakRef(); + bar->method(); + } + { + RefCountable *bar = guardian.ptr(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + guardian = o.trivial() ? o : *bar; + bar->method(); + } +} + } // namespace guardian_scopes namespace auto_keyword { From 44d0e9522a80e1301e96c4751b7572ae0c9cb4dd Mon Sep 17 00:00:00 2001 From: Akshat Oke Date: Wed, 30 Oct 2024 11:48:40 +0530 Subject: [PATCH 401/425] [CodeGen][NewPM] Port TailDuplicate pass to NPM (#113293) --- llvm/include/llvm/CodeGen/Passes.h | 4 +- llvm/include/llvm/CodeGen/TailDuplication.h | 47 +++++++++++++ llvm/include/llvm/InitializePasses.h | 4 +- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 + .../llvm/Passes/MachinePassRegistry.def | 4 +- llvm/lib/CodeGen/CodeGen.cpp | 4 +- llvm/lib/CodeGen/TailDuplication.cpp | 69 ++++++++++++++----- llvm/lib/CodeGen/TargetPassConfig.cpp | 8 +-- llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 4 +- .../CodeGen/AArch64/jump-table-duplicate.mir | 1 + .../AMDGPU/early-tailduplicator-nophis.mir | 1 + .../early-tailduplicator-terminator.mir | 1 + .../stop-tail-duplicate-cfg-intrinsic.mir | 1 + 14 files changed, 120 insertions(+), 30 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/TailDuplication.h diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index e12c1f076f133c9..d1c71fc95818c80 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -261,11 +261,11 @@ namespace llvm { /// TailDuplicate - Duplicate blocks with unconditional branches /// into tails of their predecessors. - extern char &TailDuplicateID; + extern char &TailDuplicateLegacyID; /// Duplicate blocks with unconditional branches into tails of their /// predecessors. Variant that works before register allocation. - extern char &EarlyTailDuplicateID; + extern char &EarlyTailDuplicateLegacyID; /// MachineTraceMetrics - This pass computes critical path and CPU resource /// usage in an ensemble of traces. diff --git a/llvm/include/llvm/CodeGen/TailDuplication.h b/llvm/include/llvm/CodeGen/TailDuplication.h new file mode 100644 index 000000000000000..687a592ccf2fbfe --- /dev/null +++ b/llvm/include/llvm/CodeGen/TailDuplication.h @@ -0,0 +1,47 @@ +//===- llvm/CodeGen/TailDuplication.h ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TAILDUPLICATIONPASS_H +#define LLVM_CODEGEN_TAILDUPLICATIONPASS_H + +#include "llvm/CodeGen/MBFIWrapper.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +template +class TailDuplicatePassBase : public PassInfoMixin { +private: + std::unique_ptr MBFIW; + +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; + +class EarlyTailDuplicatePass + : public TailDuplicatePassBase { +public: + MachineFunctionProperties getClearedProperties() const { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } +}; + +class TailDuplicatePass + : public TailDuplicatePassBase {}; + +} // namespace llvm + +extern template class llvm::TailDuplicatePassBase; +extern template class llvm::TailDuplicatePassBase; + +#endif // LLVM_CODEGEN_TAILDUPLICATIONPASS_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 26f5d63553c5a87..54c070401ec8a40 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -101,7 +101,7 @@ void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &); void initializeEarlyIfConverterLegacyPass(PassRegistry &); void initializeEarlyIfPredicatorPass(PassRegistry &); void initializeEarlyMachineLICMPass(PassRegistry &); -void initializeEarlyTailDuplicatePass(PassRegistry &); +void initializeEarlyTailDuplicateLegacyPass(PassRegistry &); void initializeEdgeBundlesPass(PassRegistry &); void initializeEHContGuardCatchretPass(PassRegistry &); void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry &); @@ -300,7 +300,7 @@ void initializeStraightLineStrengthReduceLegacyPassPass(PassRegistry &); void initializeStripDebugMachineModulePass(PassRegistry &); void initializeStructurizeCFGLegacyPassPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry &); -void initializeTailDuplicatePass(PassRegistry &); +void initializeTailDuplicateLegacyPass(PassRegistry &); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry &); void initializeTargetPassConfigPass(PassRegistry &); void initializeTargetTransformInfoWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index ad80c661147d6f5..9e95625fd1d881e 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -60,6 +60,7 @@ #include "llvm/CodeGen/SjLjEHPrepare.h" #include "llvm/CodeGen/StackColoring.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TailDuplication.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TwoAddressInstructionPass.h" #include "llvm/CodeGen/UnreachableBlockElim.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 4f32a917738c134..9d12a120ff7ac6d 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -133,6 +133,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis()) MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass()) MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass()) MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass()) +MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass()) @@ -157,6 +158,7 @@ MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) +MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass()) MACHINE_FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) MACHINE_FUNCTION_PASS("two-address-instruction", TwoAddressInstructionPass()) MACHINE_FUNCTION_PASS("verify", MachineVerifierPass()) @@ -210,7 +212,6 @@ DUMMY_MACHINE_FUNCTION_PASS("cfi-fixup", CFIFixupPass) DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass) DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass) DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter) -DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass) DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass) DUMMY_MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass) DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass) @@ -262,7 +263,6 @@ DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass) DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass) DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass) DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass) -DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass) DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass) DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass) DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass) diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index cf5c35fe81b4c71..39fba1d0b527ef6 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -38,7 +38,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeEarlyIfConverterLegacyPass(Registry); initializeEarlyIfPredicatorPass(Registry); initializeEarlyMachineLICMPass(Registry); - initializeEarlyTailDuplicatePass(Registry); + initializeEarlyTailDuplicateLegacyPass(Registry); initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpLegacyPassPass(Registry); @@ -131,7 +131,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeStripDebugMachineModulePass(Registry); - initializeTailDuplicatePass(Registry); + initializeTailDuplicateLegacyPass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionLegacyPassPass(Registry); initializeTypePromotionLegacyPass(Registry); diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp index 25f20d9c899bb07..b698ca675b65e2d 100644 --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -12,13 +12,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TailDuplication.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/IR/Analysis.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" @@ -29,13 +32,13 @@ using namespace llvm; namespace { -class TailDuplicateBase : public MachineFunctionPass { +class TailDuplicateBaseLegacy : public MachineFunctionPass { TailDuplicator Duplicator; std::unique_ptr MBFIW; bool PreRegAlloc; public: - TailDuplicateBase(char &PassID, bool PreRegAlloc) - : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} + TailDuplicateBaseLegacy(char &PassID, bool PreRegAlloc) + : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -47,19 +50,19 @@ class TailDuplicateBase : public MachineFunctionPass { } }; -class TailDuplicate : public TailDuplicateBase { +class TailDuplicateLegacy : public TailDuplicateBaseLegacy { public: static char ID; - TailDuplicate() : TailDuplicateBase(ID, false) { - initializeTailDuplicatePass(*PassRegistry::getPassRegistry()); + TailDuplicateLegacy() : TailDuplicateBaseLegacy(ID, false) { + initializeTailDuplicateLegacyPass(*PassRegistry::getPassRegistry()); } }; -class EarlyTailDuplicate : public TailDuplicateBase { +class EarlyTailDuplicateLegacy : public TailDuplicateBaseLegacy { public: static char ID; - EarlyTailDuplicate() : TailDuplicateBase(ID, true) { - initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry()); + EarlyTailDuplicateLegacy() : TailDuplicateBaseLegacy(ID, true) { + initializeEarlyTailDuplicateLegacyPass(*PassRegistry::getPassRegistry()); } MachineFunctionProperties getClearedProperties() const override { @@ -70,17 +73,18 @@ class EarlyTailDuplicate : public TailDuplicateBase { } // end anonymous namespace -char TailDuplicate::ID; -char EarlyTailDuplicate::ID; +char TailDuplicateLegacy::ID; +char EarlyTailDuplicateLegacy::ID; -char &llvm::TailDuplicateID = TailDuplicate::ID; -char &llvm::EarlyTailDuplicateID = EarlyTailDuplicate::ID; +char &llvm::TailDuplicateLegacyID = TailDuplicateLegacy::ID; +char &llvm::EarlyTailDuplicateLegacyID = EarlyTailDuplicateLegacy::ID; -INITIALIZE_PASS(TailDuplicate, DEBUG_TYPE, "Tail Duplication", false, false) -INITIALIZE_PASS(EarlyTailDuplicate, "early-tailduplication", +INITIALIZE_PASS(TailDuplicateLegacy, DEBUG_TYPE, "Tail Duplication", false, + false) +INITIALIZE_PASS(EarlyTailDuplicateLegacy, "early-tailduplication", "Early Tail Duplication", false, false) -bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { +bool TailDuplicateBaseLegacy::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -100,3 +104,36 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } + +template +PreservedAnalyses TailDuplicatePassBase::run( + MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(static_cast(*this), MF); + + if (MF.getFunction().hasOptNone()) + return PreservedAnalyses::all(); + + auto *MBPI = &MFAM.getResult(MF); + auto *PSI = MFAM.getResult(MF) + .getCachedResult( + *MF.getFunction().getParent()); + auto *MBFI = (PSI && PSI->hasProfileSummary() + ? &MFAM.getResult(MF) + : nullptr); + if (MBFI) + MBFIW = std::make_unique(*MBFI); + + TailDuplicator Duplicator; + Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI, + /*LayoutMode=*/false); + bool MadeChange = false; + while (Duplicator.tailDuplicateBlocks()) + MadeChange = true; + + if (!MadeChange) + return PreservedAnalyses::all(); + return getMachineFunctionPassPreservedAnalyses(); +} + +template class llvm::TailDuplicatePassBase; +template class llvm::TailDuplicatePassBase; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 12225c9946e9fc7..aff74104006e5a7 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -290,10 +290,10 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &BranchFolderPassID) return applyDisable(TargetID, DisableBranchFold); - if (StandardID == &TailDuplicateID) + if (StandardID == &TailDuplicateLegacyID) return applyDisable(TargetID, DisableTailDuplicate); - if (StandardID == &EarlyTailDuplicateID) + if (StandardID == &EarlyTailDuplicateLegacyID) return applyDisable(TargetID, DisableEarlyTailDup); if (StandardID == &MachineBlockPlacementID) @@ -1279,7 +1279,7 @@ void TargetPassConfig::addMachinePasses() { /// Add passes that optimize machine instructions in SSA form. void TargetPassConfig::addMachineSSAOptimization() { // Pre-ra tail duplication. - addPass(&EarlyTailDuplicateID); + addPass(&EarlyTailDuplicateLegacyID); // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. @@ -1507,7 +1507,7 @@ void TargetPassConfig::addMachineLateOptimization() { // performance for targets that require Structured Control Flow. // In addition it can also make CFG irreducible. Thus we disable it. if (!TM->requiresStructuredCFG()) - addPass(&TailDuplicateID); + addPass(&TailDuplicateLegacyID); // Copy propagation. addPass(&MachineCopyPropagationID); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d1f75dfb5350a0c..a879918005cad8f 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -125,6 +125,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackColoring.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TailDuplication.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TwoAddressInstructionPass.h" #include "llvm/CodeGen/TypePromotion.h" diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 1d6f39b290536af..a5a147da8da1c5b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -332,7 +332,7 @@ void NVPTXPassConfig::addIRPasses() { disablePass(&PrologEpilogCodeInserterID); disablePass(&MachineLateInstrsCleanupID); disablePass(&MachineCopyPropagationID); - disablePass(&TailDuplicateID); + disablePass(&TailDuplicateLegacyID); disablePass(&StackMapLivenessID); disablePass(&PostRAMachineSinkingID); disablePass(&PostRASchedulerID); @@ -461,7 +461,7 @@ void NVPTXPassConfig::addOptimizedRegAlloc() { void NVPTXPassConfig::addMachineSSAOptimization() { // Pre-ra tail duplication. - if (addPass(&EarlyTailDuplicateID)) + if (addPass(&EarlyTailDuplicateLegacyID)) printAndVerify("After Pre-RegAlloc TailDuplicate"); // Optimize PHIs before DCE: removing dead PHI cycles may make more diff --git a/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir index 0963ecbb123115d..a2532a854923f53 100644 --- a/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir +++ b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir @@ -1,4 +1,5 @@ # RUN: llc -run-pass=tailduplication -tail-dup-size=4 %s -o - | FileCheck %s +# RUN: llc -passes=tailduplication -tail-dup-size=4 %s -o - | FileCheck %s # JumpTableDest32 uses an `adr` to a temporary label (itself). If duplicated we # cannot guarantee reachability for any uses after the first. diff --git a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir index 2cb84c7ef4637d5..072cc3a60a60cae 100644 --- a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir +++ b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=early-tailduplication -o - %s | FileCheck %s # There are no phis in this testcase. Early tail duplication introduces them, # so the NoPHIs property needs to be cleared to avoid verifier errors diff --git a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir index 41c6906b3c85ad2..8132fa4df89eee4 100644 --- a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir +++ b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=early-tailduplication -o - %s | FileCheck %s # Early tail duplication should not merge bb.6 into bb.5, adding a # non-terminator (S_SLEEP) after the terminator S_MOV_B32_term. diff --git a/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir b/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir index c23c8900096fba1..be1a8aceb8c9032 100644 --- a/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir +++ b/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes=early-tailduplication -o - %s | FileCheck %s --- name: stop_duplicate_cfg_intrinsic From cad09404cc804dd35d2f3b742d1d6efb6d5a9449 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Wed, 30 Oct 2024 14:34:19 +0800 Subject: [PATCH 402/425] [sema] enhance error handling for compound stmt body in `StmtExpr` (#113760) Mark the whole StmtExpr invalid when the last statement in compound statement is invalid. Because the last statement need to do copy initialization, it causes subsequent errors to simply ignore last invalid statement. Fixed: #113468 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Parse/ParseStmt.cpp | 9 +++++++++ clang/test/SemaCXX/gh113468.cpp | 12 ++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 clang/test/SemaCXX/gh113468.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a39ffc8366dda41..1837707b8caecb9 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -470,6 +470,7 @@ Bug Fixes in This Version - The warning emitted for an unsupported register variable type now points to the unsupported type instead of the ``register`` keyword (#GH109776). - Fixed a crash when emit ctor for global variant with flexible array init (#GH113187). +- Fixed a crash when GNU statement expression contains invalid statement (#GH113468). Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index 7d727efb228731c..6470e55e521add7 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -1243,6 +1243,7 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) { ParsedStmtContext::Compound | (isStmtExpr ? ParsedStmtContext::InStmtExpr : ParsedStmtContext()); + bool LastIsError = false; while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { if (Tok.is(tok::annot_pragma_unused)) { @@ -1299,7 +1300,15 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) { if (R.isUsable()) Stmts.push_back(R.get()); + LastIsError = R.isInvalid(); } + // StmtExpr needs to do copy initialization for last statement. + // If last statement is invalid, the last statement in `Stmts` will be + // incorrect. Then the whole compound statement should also be marked as + // invalid to prevent subsequent errors. + if (isStmtExpr && LastIsError && !Stmts.empty()) + return StmtError(); + // Warn the user that using option `-ffp-eval-method=source` on a // 32-bit target and feature `sse` disabled, or using // `pragma clang fp eval_method=source` and feature `sse` disabled, is not diff --git a/clang/test/SemaCXX/gh113468.cpp b/clang/test/SemaCXX/gh113468.cpp new file mode 100644 index 000000000000000..94551986b0efaab --- /dev/null +++ b/clang/test/SemaCXX/gh113468.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s + +constexpr int expr() { + if (({ + int f; + f = 0; + if (f) + break; // expected-error {{'break' statement not in loop or switch statement}} + })) + return 2; + return 1; +} From 5df84a75351d0e9c3e20d50ac1047c937e3b8e88 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Wed, 30 Oct 2024 14:37:04 +0800 Subject: [PATCH 403/425] [NFC] clean space in clang release note (#114188) --- clang/docs/ReleaseNotes.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1837707b8caecb9..6085352dfafe6b3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -140,7 +140,7 @@ C++ Specific Potentially Breaking Changes unsigned operator""_udl_name(unsigned long long); - Clang will now produce an error diagnostic when [[clang::lifetimebound]] is - applied on a parameter of a function that returns void. This was previously + applied on a parameter of a function that returns void. This was previously ignored and had no effect. (#GH107556) .. code-block:: c++ @@ -469,7 +469,7 @@ Bug Fixes in This Version - Fixed a crash using ``__array_rank`` on 64-bit targets. (#GH113044). - The warning emitted for an unsupported register variable type now points to the unsupported type instead of the ``register`` keyword (#GH109776). -- Fixed a crash when emit ctor for global variant with flexible array init (#GH113187). +- Fixed a crash when emit ctor for global variant with flexible array init (#GH113187). - Fixed a crash when GNU statement expression contains invalid statement (#GH113468). Bug Fixes to Compiler Builtins From dc56a86b96d77a93f761995d50f7b2f112856311 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Wed, 30 Oct 2024 07:32:05 +0000 Subject: [PATCH 404/425] [clang] Fix 71315698c9 in presence of incomplete types (#114095) Incomplete types are not considered trivially copyable by clang but we don't want to warn about invalid argument for memcpy / memset in that case because we cannot prove they are not Trivially Copyable. --- clang/lib/Sema/SemaChecking.cpp | 11 ++++++++--- clang/test/SemaCXX/constexpr-string.cpp | 2 -- clang/test/SemaCXX/warn-memaccess.cpp | 25 +++++++++++++++++++++---- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 3308b898a5b68f4..dae271c1ff50014 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -8900,7 +8900,12 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, << Call->getCallee()->getSourceRange()); else if (const auto *RT = PointeeTy->getAs()) { - bool IsTriviallyCopyableCXXRecord = + // FIXME: Do not consider incomplete types even though they may be + // completed later. GCC does not diagnose such code, but we may want to + // consider diagnosing it in the future, perhaps under a different, but + // related, diagnostic group. + bool MayBeTriviallyCopyableCXXRecord = + RT->isIncompleteType() || RT->desugar().isTriviallyCopyableType(Context); if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) && @@ -8910,7 +8915,7 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, << ArgIdx << FnName << PointeeTy << 0); SearchNonTrivialToInitializeField::diag(PointeeTy, Dest, *this); } else if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) && - !IsTriviallyCopyableCXXRecord && ArgIdx == 0) { + !MayBeTriviallyCopyableCXXRecord && ArgIdx == 0) { // FIXME: Limiting this warning to dest argument until we decide // whether it's valid for source argument too. DiagRuntimeBehavior(Dest->getExprLoc(), Dest, @@ -8923,7 +8928,7 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, << ArgIdx << FnName << PointeeTy << 1); SearchNonTrivialToCopyField::diag(PointeeTy, Dest, *this); } else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) && - !IsTriviallyCopyableCXXRecord && ArgIdx == 0) { + !MayBeTriviallyCopyableCXXRecord && ArgIdx == 0) { // FIXME: Limiting this warning to dest argument until we decide // whether it's valid for source argument too. DiagRuntimeBehavior(Dest->getExprLoc(), Dest, diff --git a/clang/test/SemaCXX/constexpr-string.cpp b/clang/test/SemaCXX/constexpr-string.cpp index 5448365489a514d..c456740ef7551f7 100644 --- a/clang/test/SemaCXX/constexpr-string.cpp +++ b/clang/test/SemaCXX/constexpr-string.cpp @@ -670,8 +670,6 @@ namespace MemcpyEtc { constexpr bool test_address_of_incomplete_struct_type() { // expected-error {{never produces a constant}} struct Incomplete; extern Incomplete x, y; - // expected-warning@+2 {{first argument in call to '__builtin_memcpy' is a pointer to non-trivially copyable type 'Incomplete'}} - // expected-note@+1 {{explicitly cast the pointer to silence this warning}} __builtin_memcpy(&x, &x, 4); // expected-note@-1 2{{cannot constant evaluate 'memcpy' between objects of incomplete type 'Incomplete'}} return true; diff --git a/clang/test/SemaCXX/warn-memaccess.cpp b/clang/test/SemaCXX/warn-memaccess.cpp index b4b7f6a6905b23e..070b44891a91aa4 100644 --- a/clang/test/SemaCXX/warn-memaccess.cpp +++ b/clang/test/SemaCXX/warn-memaccess.cpp @@ -7,12 +7,17 @@ extern "C" void *memcpy(void *s1, const void *s2, unsigned n); class TriviallyCopyable {}; class NonTriviallyCopyable { NonTriviallyCopyable(const NonTriviallyCopyable&);}; +struct Incomplete; void test_bzero(TriviallyCopyable* tc, - NonTriviallyCopyable *ntc) { + NonTriviallyCopyable *ntc, + Incomplete* i) { // OK bzero(tc, sizeof(*tc)); + // OK + bzero(i, 10); + // expected-warning@+2{{first argument in call to 'bzero' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} // expected-note@+1{{explicitly cast the pointer to silence this warning}} bzero(ntc, sizeof(*ntc)); @@ -22,10 +27,14 @@ void test_bzero(TriviallyCopyable* tc, } void test_memset(TriviallyCopyable* tc, - NonTriviallyCopyable *ntc) { + NonTriviallyCopyable *ntc, + Incomplete* i) { // OK memset(tc, 0, sizeof(*tc)); + // OK + memset(i, 0, 10); + // expected-warning@+2{{first argument in call to 'memset' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} // expected-note@+1{{explicitly cast the pointer to silence this warning}} memset(ntc, 0, sizeof(*ntc)); @@ -36,10 +45,14 @@ void test_memset(TriviallyCopyable* tc, void test_memcpy(TriviallyCopyable* tc0, TriviallyCopyable* tc1, - NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1) { + NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1, + Incomplete *i0, Incomplete *i1) { // OK memcpy(tc0, tc1, sizeof(*tc0)); + // OK + memcpy(i0, i1, 10); + // expected-warning@+2{{first argument in call to 'memcpy' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} // expected-note@+1{{explicitly cast the pointer to silence this warning}} memcpy(ntc0, ntc1, sizeof(*ntc0)); @@ -52,10 +65,14 @@ void test_memcpy(TriviallyCopyable* tc0, TriviallyCopyable* tc1, } void test_memmove(TriviallyCopyable* tc0, TriviallyCopyable* tc1, - NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1) { + NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1, + Incomplete *i0, Incomplete *i1) { // OK memmove(tc0, tc1, sizeof(*tc0)); + // OK + memmove(i0, i1, 10); + // expected-warning@+2{{first argument in call to 'memmove' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} // expected-note@+1{{explicitly cast the pointer to silence this warning}} memmove(ntc0, ntc1, sizeof(*ntc0)); From 362273d1435c0cc104418f88b0140d0388e9ee22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nagy-Egri=20M=C3=A1t=C3=A9=20Ferenc?= Date: Wed, 30 Oct 2024 08:51:11 +0100 Subject: [PATCH 405/425] [clang-format] Fix path expansion inside git-clang-format.bat (#114078) The trampoline script used on Windows (due to the absence of shebang support) doesn't properly expand the path to the Python script, as it leaves out the drive letter. Functionally equivalent reproducer in action ``` PS C:\Users\mate> gc (gcm git-clang-formatish.bat).Source @ECHO OFF echo "%~pn0" %* PS C:\Users\mate> git-clang-formatish "\Users\mate\git-clang-formatish" ``` Adding `d` to the variable modifiers [as per the docs](https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/for) the drive letter is added. Even in the magical cases when it works. (I couldn't reproduce, but I suspect it's only tested from some bash/cygwin variant, where the path becomes `/c/Program Files/...`, but the drive letter is needed. Without it, I also observed cases when used via `git clang-format` (without the inital dash) it tries to infer the drive letter based on the current working directory. In that case it fails to find `D:\Program Files\LLVM\bin\clang-format.exe`, which naturally fails, because `Program Files` is on `C:`) --- clang/tools/clang-format/git-clang-format.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/tools/clang-format/git-clang-format.bat b/clang/tools/clang-format/git-clang-format.bat index 19c82d8a04132b6..a40276e63c5848b 100644 --- a/clang/tools/clang-format/git-clang-format.bat +++ b/clang/tools/clang-format/git-clang-format.bat @@ -1 +1 @@ -py -3 "%~pn0" %* +py -3 "%~dpn0" %* From 9c8dab018dee3143c28a7d7f5fdb32385da36101 Mon Sep 17 00:00:00 2001 From: Boaz Brickner Date: Wed, 30 Oct 2024 08:59:49 +0100 Subject: [PATCH 406/425] [clang] Update the lifetimebound example with up-to-date expected warning and change the sample code to be a fully working example (#113437) Tested the code: https://godbolt.org/z/n5xcq65YM Tested the generated documentation: ![BruDQ2UkTXHA9PE](https://github.com/user-attachments/assets/cf527d1a-ef3b-41f2-84c2-4ca38af16d2d) --- clang/include/clang/Basic/AttrDocs.td | 28 +++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 7a130c434e73ced..fbbfc4acdf391ef 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3702,20 +3702,32 @@ user-declared functions. For example: .. code-block:: c++ + #include + #include + + using namespace std::literals; + // Returns m[key] if key is present, or default_value if not. template const U &get_or_default(const std::map &m [[clang::lifetimebound]], const T &key, /* note, not lifetimebound */ - const U &default_value [[clang::lifetimebound]]); + const U &default_value [[clang::lifetimebound]]) { + if (auto iter = m.find(key); iter != m.end()) return iter->second; + else return default_value; + } - std::map m; - // warning: temporary "bar"s that might be bound to local reference 'val' - // will be destroyed at the end of the full-expression - const std::string &val = get_or_default(m, "foo"s, "bar"s); + int main() { + std::map m; + // warning: temporary bound to local reference 'val1' will be destroyed + // at the end of the full-expression + const std::string &val1 = get_or_default(m, "foo"s, "bar"s); - // No warning in this case. - std::string def_val = "bar"s; - const std::string &val = get_or_default(m, "foo"s, def_val); + // No warning in this case. + std::string def_val = "bar"s; + const std::string &val2 = get_or_default(m, "foo"s, def_val); + + return 0; + } The attribute can be applied to the implicit ``this`` parameter of a member function by writing the attribute after the function type: From f3584222682bd64daa89cbfe41c071c6bfc2347a Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 30 Oct 2024 08:10:35 +0000 Subject: [PATCH 407/425] [Attributor] Add nofpclass test for phi+select recurrences. NFC --- .../Attributor/nofpclass-phiselect.ll | 176 ++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 llvm/test/Transforms/Attributor/nofpclass-phiselect.ll diff --git a/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll b/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll new file mode 100644 index 000000000000000..6635280bc436039 --- /dev/null +++ b/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -S < %s | FileCheck %s + +define float @phi_select(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) { +; CHECK-LABEL: define float @phi_select +; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[SELECT]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %select, %loop ] + %select = select i1 %c, float %phi, float %arg + br i1 %c, label %loop, label %exit + +exit: + ret float %select +} + +define float @phi_select_onlybase(i1 %c, float nofpclass(inf) %base, float %arg) { +; CHECK-LABEL: define float @phi_select_onlybase +; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[SELECT]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %select, %loop ] + %select = select i1 %c, float %phi, float %arg + br i1 %c, label %loop, label %exit + +exit: + ret float %select +} + +define float @phi_select_onlyarg(i1 %c, float %base, float nofpclass(inf) %arg) { +; CHECK-LABEL: define float @phi_select_onlyarg +; CHECK-SAME: (i1 [[C:%.*]], float [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[SELECT]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %select, %loop ] + %select = select i1 %c, float %phi, float %arg + br i1 %c, label %loop, label %exit + +exit: + ret float %select +} + +define float @phi_phi(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) { +; CHECK-LABEL: define float @phi_phi +; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ] +; CHECK-NEXT: br i1 [[C]], label [[INNER]], label [[EXIT1]] +; CHECK: exit1: +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[PHI2]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ] + br label %inner + +inner: + %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ] + br i1 %c, label %inner, label %exit1 + +exit1: + br i1 %c, label %loop, label %exit + +exit: + ret float %phi2 +} + +define float @phi_phi_onlybase(i1 %c, float nofpclass(inf) %base, float %arg) { +; CHECK-LABEL: define float @phi_phi_onlybase +; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ] +; CHECK-NEXT: br i1 [[C]], label [[INNER]], label [[EXIT1]] +; CHECK: exit1: +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[PHI2]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ] + br label %inner + +inner: + %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ] + br i1 %c, label %inner, label %exit1 + +exit1: + br i1 %c, label %loop, label %exit + +exit: + ret float %phi2 +} + +define float @phi_phi_onlyarg(i1 %c, float %base, float nofpclass(inf) %arg) { +; CHECK-LABEL: define float @phi_phi_onlyarg +; CHECK-SAME: (i1 [[C:%.*]], float [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ] +; CHECK-NEXT: br i1 [[C]], label [[INNER]], label [[EXIT1]] +; CHECK: exit1: +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[PHI2]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ] + br label %inner + +inner: + %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ] + br i1 %c, label %inner, label %exit1 + +exit1: + br i1 %c, label %loop, label %exit + +exit: + ret float %phi2 +} From e61a7dc256bd530a0b9551e2732e5b5b77e2cd1e Mon Sep 17 00:00:00 2001 From: Mahesh-Attarde <145317060+mahesh-attarde@users.noreply.github.com> Date: Wed, 30 Oct 2024 01:17:25 -0700 Subject: [PATCH 408/425] [X86][AVX512] Use comx for compare (#113567) We added AVX10.2 COMEF ISA in LLVM, This does not optimize correctly in scenario mentioned below. Summary Input ``` define i1 @oeq(float %x, float %y) { %1 = fcmp oeq float %x, %y ret i1 %1 }define i1 @une(float %x, float %y) { %1 = fcmp une float %x, %y ret i1 %1 }define i1 @ogt(float %x, float %y) { %1 = fcmp ogt float %x, %y ret i1 %1 } // Prior AVX10.2, default code generation oeq: # @oeq cmpeqss xmm0, xmm1 movd eax, xmm0 and eax, 1 ret une: # @une cmpneqss xmm0, xmm1 movd eax, xmm0 and eax, 1 ret ogt: # @ogt ucomiss xmm0, xmm1 seta al ret ``` This patch will remove `cmpeqss` and `cmpneqss`. For complete transform check unit test. Continuing on what PR https://github.com/llvm/llvm-project/pull/113098 added Earlier Legalization and combine expanded `setcc oeq:ch` node into `and` and `setcc eq` , `setcc o`. From suggestions in community new internal transform ``` Optimized type-legalized selection DAG: %bb.0 'hoeq:' SelectionDAG has 11 nodes: t0: ch,glue = EntryToken t2: f16,ch = CopyFromReg t0, Register:f16 %0 t4: f16,ch = CopyFromReg t0, Register:f16 %1 t14: i8 = setcc t2, t4, setoeq:ch t10: ch,glue = CopyToReg t0, Register:i8 $al, t14 t11: ch = X86ISD::RET_GLUE t10, TargetConstant:i32<0>, Register:i8 $al, t10:1 Optimized legalized selection DAG: %bb.0 'hoeq:' SelectionDAG has 12 nodes: t0: ch,glue = EntryToken t2: f16,ch = CopyFromReg t0, Register:f16 %0 t4: f16,ch = CopyFromReg t0, Register:f16 %1 t15: i32 = X86ISD::UCOMX t2, t4 t17: i8 = X86ISD::SETCC TargetConstant:i8<4>, t15 t10: ch,glue = CopyToReg t0, Register:i8 $al, t17 t11: ch = X86ISD::RET_GLUE t10, TargetConstant:i32<0>, Register:i8 $al, t10:1 ``` Earlier transform is mentioned here https://github.com/llvm/llvm-project/pull/113098#discussion_r1810307663 --------- Co-authored-by: mattarde --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 ++ llvm/lib/Target/X86/X86InstrAVX10.td | 27 +++ llvm/test/CodeGen/X86/avx10_2-cmp.ll | 237 ++++++++++++++++++++++++ llvm/test/TableGen/x86-fold-tables.inc | 3 + 4 files changed, 278 insertions(+) create mode 100644 llvm/test/CodeGen/X86/avx10_2-cmp.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1c790f3813b7a47..34bc5d76c15ceaa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2440,6 +2440,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::v32bf16, Legal); setOperationAction(ISD::SETCC, MVT::v32bf16, Custom); } + for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) { + setCondCodeAction(ISD::SETOEQ, VT, Custom); + setCondCodeAction(ISD::SETUNE, VT, Custom); + } } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { @@ -24072,6 +24076,13 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; } + if (Subtarget.hasAVX10_2()) { + if (CC == ISD::SETOEQ || CC == ISD::SETUNE) { + auto NewCC = (CC == ISD::SETOEQ) ? X86::COND_E : (X86::COND_NE); + return getSETCC(NewCC, DAG.getNode(X86ISD::UCOMX, dl, MVT::i32, Op0, Op1), + dl, DAG); + } + } // Handle floating point. X86::CondCode CondCode = TranslateX86CC(CC, dl, /*IsFP*/ true, Op0, Op1, DAG); if (CondCode == X86::COND_INVALID) diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 4d64eb776e09cef..0301c07dfb540b7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_ //------------------------------------------------- // AVX10 COMEF instructions //------------------------------------------------- +multiclass avx10_com_ef Opc, RegisterClass RC, ValueType VT, + SDPatternOperator OpNode, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + Domain d, X86FoldableSchedWrite sched = WriteFComX>{ + let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { + def rr : AVX512, + EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; + let mayLoad = 1 in { + def rm : AVX512, + EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; + } + } +} + multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, string OpcodeStr, Domain d, @@ -1564,6 +1582,15 @@ multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, } let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { + defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, + "vucomxsd", f64mem, loadf64, SSEPackedDouble>, + TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; + defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, + "vucomxsh", f16mem, loadf16, SSEPackedSingle>, + T_MAP5, XD, EVEX_CD8<16, CD8VT1>; + defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, + "vucomxss", f32mem, loadf32, SSEPackedSingle>, + TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, "vcomxsd", SSEPackedDouble>, TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll new file mode 100644 index 000000000000000..de0bec7ea2695a8 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86 + +define i1 @hoeq(half %x, half %y) { +; X64-LABEL: hoeq: +; X64: # %bb.0: +; X64-NEXT: vucomxsh %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: hoeq: +; X86: # %bb.0: +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq half %x, %y + ret i1 %1 +} + +define i1 @hune(half %x, half %y) { +; X64-LABEL: hune: +; X64: # %bb.0: +; X64-NEXT: vucomxsh %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: hune: +; X86: # %bb.0: +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une half %x, %y + ret i1 %1 +} + +define i1 @hoeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: hoeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vucomxsh (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: hoeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load half, ptr %xp + %y = load half, ptr %yp + %1 = fcmp oeq half %x, %y + ret i1 %1 +} + +define i1 @hune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: hune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vucomxsh (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: hune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load half, ptr %xp + %y = load half, ptr %yp + %1 = fcmp une half %x, %y + ret i1 %1 +} + +define i1 @foeq(float %x, float %y) { +; X64-LABEL: foeq: +; X64: # %bb.0: +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: foeq: +; X86: # %bb.0: +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq float %x, %y + ret i1 %1 +} + +define i1 @fune(float %x, float %y) { +; X64-LABEL: fune: +; X64: # %bb.0: +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: fune: +; X86: # %bb.0: +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une float %x, %y + ret i1 %1 +} + +define i1 @foeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: foeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vucomxss (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: foeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load float, ptr %xp + %y = load float, ptr %yp + %1 = fcmp oeq float %x, %y + ret i1 %1 +} + +define i1 @fune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: fune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vucomxss (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: fune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load float, ptr %xp + %y = load float, ptr %yp + %1 = fcmp une float %x, %y + ret i1 %1 +} + +define i1 @doeq(double %x, double %y) { +; X64-LABEL: doeq: +; X64: # %bb.0: +; X64-NEXT: vucomxsd %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: doeq: +; X86: # %bb.0: +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq double %x, %y + ret i1 %1 +} + +define i1 @dune(double %x, double %y) { +; X64-LABEL: dune: +; X64: # %bb.0: +; X64-NEXT: vucomxsd %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: dune: +; X86: # %bb.0: +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une double %x, %y + ret i1 %1 +} + +define i1 @doeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: doeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vucomxsd (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: doeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load double, ptr %xp + %y = load double, ptr %yp + %1 = fcmp oeq double %x, %y + ret i1 %1 +} + +define i1 @dune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: dune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vucomxsd (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: dune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load double, ptr %xp + %y = load double, ptr %yp + %1 = fcmp une double %x, %y + ret i1 %1 +} diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 43c206fa0af698e..21f3c8593a710b0 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1959,8 +1959,11 @@ static const X86FoldTableEntry Table1[] = { {X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE}, {X86::VUCOMISSrr, X86::VUCOMISSrm, 0}, {X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSDZrr, X86::VUCOMXSDZrm, 0}, {X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSHZrr, X86::VUCOMXSHZrm, 0}, {X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0}, {X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE}, {X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0}, {X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0}, From 259eaa6878ead1e2e7ef572a874dc3d885c1899b Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 30 Oct 2024 17:27:04 +0800 Subject: [PATCH 409/425] [C++20] [Modules] Fix the duplicated static initializer problem (#114193) Reproducer: ``` //--- a.cppm export module a; int func(); static int a = func(); //--- a.cpp import a; ``` The `func()` should only execute once. However, before this patch we will somehow import `static int a` from a.cppm incorrectly and initialize that again. This is super bad and can introduce serious runtime behaviors. And also surprisingly, it looks like the root cause of the problem is simply some oversight choosing APIs. --- clang/lib/CodeGen/CodeGenModule.cpp | 4 ++-- clang/test/Modules/static-initializer.cppm | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 clang/test/Modules/static-initializer.cppm diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 2bcca5e85bdfeb9..ba376f9ecfacde7 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -7146,8 +7146,8 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { // For C++ standard modules we are done - we will call the module // initializer for imported modules, and that will likewise call those for // any imports it has. - if (CXX20ModuleInits && Import->getImportedOwningModule() && - !Import->getImportedOwningModule()->isModuleMapModule()) + if (CXX20ModuleInits && Import->getImportedModule() && + Import->getImportedModule()->isNamedModule()) break; // For clang C++ module map modules the initializers for sub-modules are diff --git a/clang/test/Modules/static-initializer.cppm b/clang/test/Modules/static-initializer.cppm new file mode 100644 index 000000000000000..10d4854ee67fa62 --- /dev/null +++ b/clang/test/Modules/static-initializer.cppm @@ -0,0 +1,18 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cpp -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/a.cpp + +//--- a.cppm +export module a; +int func(); +static int a = func(); + +//--- a.cpp +import a; + +// CHECK-NOT: internal global +// CHECK-NOT: __cxx_global_var_init + From e8b95a02bff8498c888ed5e85d0197ec82b95cd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20Brku=C5=A1anin?= Date: Wed, 30 Oct 2024 10:45:33 +0100 Subject: [PATCH 410/425] [AMDGPU][MC][NFC] Add more VIMAGE encoding tests (#114054) These are primarily meant to test disassembler and that no more than one variant per instruction is in DisassemblerTables as that can cause confusion when decoding v0 (vgpr0) whose value when encoded is 0. --- llvm/test/MC/AMDGPU/gfx12_asm_vimage.s | 24 +++++++++++++++++++ .../Disassembler/AMDGPU/gfx12_dasm_vimage.txt | 24 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s index 196d75db4260528..8bf9b92e8d1d8d7 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s @@ -158,6 +158,12 @@ image_load v[0:2], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY th:TH_LO image_load v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D // GFX12: encoding: [0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00] +image_load v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D +// GFX12: encoding: [0x02,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] + +image_load v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX12: encoding: [0x07,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] + image_load_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] @@ -408,6 +414,12 @@ image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_BYPASS scope image_store v[1:4], [v2, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D // GFX12: encoding: [0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] +image_store v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D +// GFX12: encoding: [0x02,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] + +image_store v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX12: encoding: [0x07,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] + image_store_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] @@ -568,6 +580,12 @@ image_atomic_swap v[254:255], [v4, v5], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_M image_atomic_swap v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D // GFX12: encoding: [0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] +image_atomic_swap v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D +// GFX12: encoding: [0x02,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] + +image_atomic_swap v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX12: encoding: [0x07,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] + image_atomic_cmpswap v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] @@ -625,6 +643,12 @@ image_atomic_add_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_N image_atomic_add_uint v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D // GFX12: encoding: [0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] +image_atomic_add_uint v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D +// GFX12: encoding: [0x02,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] + +image_atomic_add_uint v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX12: encoding: [0x07,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] + image_atomic_sub_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt index 08e9bef8cf67851..233c2e1b9d083b8 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt @@ -160,6 +160,12 @@ # GFX12: image_load v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00] 0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00 +# GFX12: image_load v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] +0x02,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00 + +# GFX12: image_load v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] +0x07,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00 + # GFX12: image_load_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] 0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00 @@ -409,6 +415,12 @@ # GFX12: image_store v[1:4], [v2, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] 0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00 +# GFX12: image_store v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] +0x02,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00 + +# GFX12: image_store v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] +0x07,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00 + # GFX12: image_store_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] 0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00 @@ -568,6 +580,12 @@ # GFX12: image_atomic_swap v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] 0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00 +# GFX12: image_atomic_swap v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] +0x02,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00 + +# GFX12: image_atomic_swap v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] +0x07,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00 + # GFX12: image_atomic_cmpswap v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 @@ -625,6 +643,12 @@ # GFX12: image_atomic_add_uint v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] 0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00 +# GFX12: image_atomic_add_uint v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] +0x02,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00 + +# GFX12: image_atomic_add_uint v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] +0x07,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00 + # GFX12: image_atomic_sub_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 From 652988b65805b23f228db34adfff068cffd517cc Mon Sep 17 00:00:00 2001 From: Abid Qadeer Date: Wed, 30 Oct 2024 09:52:56 +0000 Subject: [PATCH 411/425] [flang][debug] Support TupleType. (#113917) Handling is similar to RecordType with following differences: 1. No check for cyclic references 2. No extra processing for lower bounds of array members. 3. No line information as TupleType is a lowering artefact and does not really represent an entity in the code. --- .../Transforms/DebugTypeGenerator.cpp | 61 ++++++++++++++++--- .../Optimizer/Transforms/DebugTypeGenerator.h | 6 ++ flang/test/Transforms/debug-tuple-type.fir | 15 +++++ 3 files changed, 73 insertions(+), 9 deletions(-) create mode 100644 flang/test/Transforms/debug-tuple-type.fir diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 8e516734a908790..a070c87137fa16e 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -271,6 +271,19 @@ static bool canCacheThisType(mlir::LLVM::DICompositeTypeAttr comTy) { return true; } +std::pair +DebugTypeGenerator::getFieldSizeAndAlign(mlir::Type fieldTy) { + mlir::Type llvmTy; + if (auto boxTy = mlir::dyn_cast_or_null(fieldTy)) + llvmTy = llvmTypeConverter.convertBoxTypeAsStruct(boxTy, getBoxRank(boxTy)); + else + llvmTy = llvmTypeConverter.convertType(fieldTy); + + uint64_t byteSize = dataLayout->getTypeSize(llvmTy); + unsigned short byteAlign = dataLayout->getTypeABIAlignment(llvmTy); + return std::pair{byteSize, byteAlign}; +} + mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType( fir::RecordType Ty, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) { @@ -303,15 +316,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType( mlir::IntegerType intTy = mlir::IntegerType::get(context, 64); std::uint64_t offset = 0; for (auto [fieldName, fieldTy] : Ty.getTypeList()) { - mlir::Type llvmTy; - if (auto boxTy = mlir::dyn_cast_or_null(fieldTy)) - llvmTy = - llvmTypeConverter.convertBoxTypeAsStruct(boxTy, getBoxRank(boxTy)); - else - llvmTy = llvmTypeConverter.convertType(fieldTy); - - uint64_t byteSize = dataLayout->getTypeSize(llvmTy); - unsigned short byteAlign = dataLayout->getTypeABIAlignment(llvmTy); + auto [byteSize, byteAlign] = getFieldSizeAndAlign(fieldTy); std::optional> lowerBounds = fir::getComponentLowerBoundsIfNonDefault(Ty, fieldName, module, symbolTable); @@ -368,6 +373,42 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType( return finalAttr; } +mlir::LLVM::DITypeAttr DebugTypeGenerator::convertTupleType( + mlir::TupleType Ty, mlir::LLVM::DIFileAttr fileAttr, + mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) { + // Check if this type has already been converted. + auto iter = typeCache.find(Ty); + if (iter != typeCache.end()) + return iter->second; + + llvm::SmallVector elements; + mlir::MLIRContext *context = module.getContext(); + + std::uint64_t offset = 0; + for (auto fieldTy : Ty.getTypes()) { + auto [byteSize, byteAlign] = getFieldSizeAndAlign(fieldTy); + mlir::LLVM::DITypeAttr elemTy = + convertType(fieldTy, fileAttr, scope, /*declOp=*/nullptr); + offset = llvm::alignTo(offset, byteAlign); + mlir::LLVM::DIDerivedTypeAttr tyAttr = mlir::LLVM::DIDerivedTypeAttr::get( + context, llvm::dwarf::DW_TAG_member, mlir::StringAttr::get(context, ""), + elemTy, byteSize * 8, byteAlign * 8, offset * 8, + /*optional
=*/std::nullopt, + /*extra data=*/nullptr); + elements.push_back(tyAttr); + offset += llvm::alignTo(byteSize, byteAlign); + } + + auto typeAttr = mlir::LLVM::DICompositeTypeAttr::get( + context, llvm::dwarf::DW_TAG_structure_type, + mlir::StringAttr::get(context, ""), fileAttr, /*line=*/0, scope, + /*baseType=*/nullptr, mlir::LLVM::DIFlags::Zero, offset * 8, + /*alignInBits=*/0, elements, /*dataLocation=*/nullptr, /*rank=*/nullptr, + /*allocated=*/nullptr, /*associated=*/nullptr); + typeCache[Ty] = typeAttr; + return typeAttr; +} + mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( fir::SequenceType seqTy, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) { @@ -574,6 +615,8 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, /*hasDescriptor=*/false); } else if (auto recTy = mlir::dyn_cast_or_null(Ty)) { return convertRecordType(recTy, fileAttr, scope, declOp); + } else if (auto tupleTy = mlir::dyn_cast_if_present(Ty)) { + return convertTupleType(tupleTy, fileAttr, scope, declOp); } else if (auto refTy = mlir::dyn_cast_if_present(Ty)) { auto elTy = refTy.getEleTy(); return convertPointerLikeType(elTy, fileAttr, scope, declOp, diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h index eeefb6c463d9366..c1fce4bdae5ce5e 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h @@ -39,6 +39,10 @@ class DebugTypeGenerator { mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp); + mlir::LLVM::DITypeAttr convertTupleType(mlir::TupleType Ty, + mlir::LLVM::DIFileAttr fileAttr, + mlir::LLVM::DIScopeAttr scope, + fir::cg::XDeclareOp declOp); mlir::LLVM::DITypeAttr convertSequenceType(fir::SequenceType seqTy, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, @@ -73,6 +77,8 @@ class DebugTypeGenerator { mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp); + std::pair + getFieldSizeAndAlign(mlir::Type fieldTy); mlir::ModuleOp module; mlir::SymbolTable *symbolTable; diff --git a/flang/test/Transforms/debug-tuple-type.fir b/flang/test/Transforms/debug-tuple-type.fir new file mode 100644 index 000000000000000..c9b0d16c06e1ae2 --- /dev/null +++ b/flang/test/Transforms/debug-tuple-type.fir @@ -0,0 +1,15 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func private @fn1(!fir.ref>) + func.func private @_FortranAioOutputDerivedType(!fir.ref>) +} + +// CHECK: #[[F64:.*]] = #llvm.di_basic_type +// CHECK: #[[CU:.*]] = #llvm.di_compile_unit<{{.*}}> +// CHECK: #[[DTY1:.*]] = #llvm.di_derived_type +// CHECK: #[[DTY2:.*]] = #llvm.di_derived_type +// CHECK: #[[COM_TY1:.*]] = #llvm.di_composite_type +// CHECK: #[[COM_TY2:.*]] = #llvm.di_composite_type +// CHECK: #llvm.di_subroutine_type +// CHECK: #llvm.di_subroutine_type From 03948882d3bac33cf71a47df1c7ee0f87aad9fc2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Oct 2024 10:12:57 +0000 Subject: [PATCH 412/425] Fix MSVC "32-bit shift implicitly converted to 64 bits" warning. NFC NumBits should be less than 20 so using an unsigned instead of size_t should be OK --- llvm/lib/Support/TrieRawHashMap.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Support/TrieRawHashMap.cpp b/llvm/lib/Support/TrieRawHashMap.cpp index 4741f3d4db0490f..11d79a62d011ddf 100644 --- a/llvm/lib/Support/TrieRawHashMap.cpp +++ b/llvm/lib/Support/TrieRawHashMap.cpp @@ -79,7 +79,7 @@ class TrieSubtrie final static constexpr size_t sizeToAlloc(unsigned NumBits) { assert(NumBits < 20 && "Tries should have fewer than ~1M slots"); - size_t Count = 1u << NumBits; + unsigned Count = 1u << NumBits; return totalSizeToAlloc>(Count); } From f7b5f0c805c899b59bcc37279a0a05dca35d3a25 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Oct 2024 10:46:12 +0000 Subject: [PATCH 413/425] [DAG] Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z))) On ANDNOT capable targets we can always do this profitably, without ANDNOT we only attempt this if we don't introduce an additional NOT Followup to #112547 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 +- llvm/test/CodeGen/X86/andnot-patterns.ll | 463 ++++++++++++------ 2 files changed, 314 insertions(+), 160 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b800204d917503f..ceaf5d664131c3a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7355,7 +7355,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y))) // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y))) - SDValue X, Y, NotY; + SDValue X, Y, Z, NotY; for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE}) if (sd_match(N, m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) && @@ -7364,6 +7364,15 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getNode(ISD::AND, DL, VT, X, DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT)); + // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z))) + for (unsigned Opc : {ISD::ROTL, ISD::ROTR}) + if (sd_match(N, m_And(m_Value(X), + m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) && + sd_match(NotY, m_Not(m_Value(Y))) && + (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse())) + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT)); + // Masking the negated extension of a boolean is just the zero-extended // boolean: // and (sub 0, zext(bool X)), 1 --> zext(bool X) diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll index 1df29f0b12d1b61..fc573fbd4fc99d3 100644 --- a/llvm/test/CodeGen/X86/andnot-patterns.ll +++ b/llvm/test/CodeGen/X86/andnot-patterns.ll @@ -14,41 +14,73 @@ declare void @use_i32(i32) ; define i64 @andnot_rotl_i64(i64 %a0, i64 %a1, i64 %a2) nounwind { -; X86-LABEL: andnot_rotl_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: notl %esi -; X86-NEXT: notl %edx -; X86-NEXT: testb $32, %cl -; X86-NEXT: jne .LBB0_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %edx, %eax -; X86-NEXT: jmp .LBB0_3 -; X86-NEXT: .LBB0_1: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %edx, %esi -; X86-NEXT: .LBB0_3: -; X86-NEXT: movl %esi, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shldl %cl, %esi, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotl_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: jne .LBB0_1 +; X86-NOBMI-NEXT: # %bb.2: +; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: jmp .LBB0_3 +; X86-NOBMI-NEXT: .LBB0_1: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: movl %eax, %esi +; X86-NOBMI-NEXT: .LBB0_3: +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: shldl %cl, %edx, %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI-NEXT: notl %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotl_i64: -; X64: # %bb.0: -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: notq %rax -; X64-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NEXT: rolq %cl, %rax -; X64-NEXT: andq %rdi, %rax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotl_i64: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: testb $32, %cl +; X86-BMI-NEXT: jne .LBB0_1 +; X86-BMI-NEXT: # %bb.2: +; X86-BMI-NEXT: movl %eax, %esi +; X86-BMI-NEXT: jmp .LBB0_3 +; X86-BMI-NEXT: .LBB0_1: +; X86-BMI-NEXT: movl %edx, %esi +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: .LBB0_3: +; X86-BMI-NEXT: movl %edx, %eax +; X86-BMI-NEXT: shldl %cl, %esi, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI-NEXT: shldl %cl, %edx, %esi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %edx +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotl_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdx, %rcx +; X64-NOBMI-NEXT: movq %rsi, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI-NEXT: rolq %cl, %rax +; X64-NOBMI-NEXT: notq %rax +; X64-NOBMI-NEXT: andq %rdi, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotl_i64: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movq %rdx, %rcx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI-NEXT: rolq %cl, %rsi +; X64-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-BMI-NEXT: retq %not = xor i64 %a1, -1 %rot = tail call i64 @llvm.fshl.i64(i64 %not, i64 %not, i64 %a2) %and = and i64 %rot, %a0 @@ -56,24 +88,40 @@ define i64 @andnot_rotl_i64(i64 %a0, i64 %a1, i64 %a2) nounwind { } define i32 @andnot_rotl_i32(i32 %a0, i32 %a1, i32 %a2) nounwind { -; X86-LABEL: andnot_rotl_i32: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax -; X86-NEXT: roll %cl, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotl_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: roll %cl, %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotl_i32: -; X64: # %bb.0: -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: roll %cl, %eax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotl_i32: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: roll %cl, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotl_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: roll %cl, %eax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotl_i32: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: roll %cl, %esi +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %rot = tail call i32 @llvm.fshl.i32(i32 %not, i32 %not, i32 %a2) %and = and i32 %rot, %a0 @@ -84,23 +132,32 @@ define i16 @andnot_rotl_i16(i16 %a0, i16 %a1, i16 %a2) nounwind { ; X86-LABEL: andnot_rotl_i16: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rolw %cl, %ax +; X86-NEXT: notl %eax ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; -; X64-LABEL: andnot_rotl_i16: -; X64: # %bb.0: -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rolw %cl, %ax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: retq +; X64-NOBMI-LABEL: andnot_rotl_i16: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: rolw %cl, %ax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotl_i16: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: rolw %cl, %si +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI-NEXT: retq %not = xor i16 %a1, -1 %rot = tail call i16 @llvm.fshl.i16(i16 %not, i16 %not, i16 %a2) %and = and i16 %rot, %a0 @@ -112,8 +169,8 @@ define i8 @andnot_rotl_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notb %al ; X86-NEXT: rolb %cl, %al +; X86-NEXT: notb %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; @@ -121,9 +178,9 @@ define i8 @andnot_rotl_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %esi, %eax -; X64-NEXT: notb %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rolb %cl, %al +; X64-NEXT: notb %al ; X64-NEXT: andb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -198,41 +255,73 @@ define i64 @andnot_rotl_i64_multiuse_rot(i64 %a0, i64 %a1, i64 %a2) nounwind { ; define i64 @andnot_rotr_i64(i64 %a0, i64 %a1, i64 %a2) nounwind { -; X86-LABEL: andnot_rotr_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: notl %esi -; X86-NEXT: notl %edx -; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB5_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %edx, %eax -; X86-NEXT: jmp .LBB5_3 -; X86-NEXT: .LBB5_1: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %edx, %esi -; X86-NEXT: .LBB5_3: -; X86-NEXT: movl %esi, %edx -; X86-NEXT: shrdl %cl, %eax, %edx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrdl %cl, %esi, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotr_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: je .LBB5_1 +; X86-NOBMI-NEXT: # %bb.2: +; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: jmp .LBB5_3 +; X86-NOBMI-NEXT: .LBB5_1: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: movl %eax, %esi +; X86-NOBMI-NEXT: .LBB5_3: +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI-NEXT: shrdl %cl, %esi, %edx +; X86-NOBMI-NEXT: notl %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotr_i64: -; X64: # %bb.0: -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: notq %rax -; X64-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NEXT: rorq %cl, %rax -; X64-NEXT: andq %rdi, %rax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotr_i64: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: testb $32, %cl +; X86-BMI-NEXT: je .LBB5_1 +; X86-BMI-NEXT: # %bb.2: +; X86-BMI-NEXT: movl %eax, %esi +; X86-BMI-NEXT: jmp .LBB5_3 +; X86-BMI-NEXT: .LBB5_1: +; X86-BMI-NEXT: movl %edx, %esi +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: .LBB5_3: +; X86-BMI-NEXT: movl %edx, %eax +; X86-BMI-NEXT: shrdl %cl, %esi, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI-NEXT: shrdl %cl, %edx, %esi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %edx +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotr_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdx, %rcx +; X64-NOBMI-NEXT: movq %rsi, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI-NEXT: rorq %cl, %rax +; X64-NOBMI-NEXT: notq %rax +; X64-NOBMI-NEXT: andq %rdi, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotr_i64: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movq %rdx, %rcx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI-NEXT: rorq %cl, %rsi +; X64-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-BMI-NEXT: retq %not = xor i64 %a1, -1 %rot = tail call i64 @llvm.fshr.i64(i64 %not, i64 %not, i64 %a2) %and = and i64 %rot, %a0 @@ -240,24 +329,40 @@ define i64 @andnot_rotr_i64(i64 %a0, i64 %a1, i64 %a2) nounwind { } define i32 @andnot_rotr_i32(i32 %a0, i32 %a1, i32 %a2) nounwind { -; X86-LABEL: andnot_rotr_i32: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax -; X86-NEXT: rorl %cl, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotr_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: rorl %cl, %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotr_i32: -; X64: # %bb.0: -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %eax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotr_i32: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: rorl %cl, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotr_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: rorl %cl, %eax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotr_i32: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: rorl %cl, %esi +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %rot = tail call i32 @llvm.fshr.i32(i32 %not, i32 %not, i32 %a2) %and = and i32 %rot, %a0 @@ -268,23 +373,32 @@ define i16 @andnot_rotr_i16(i16 %a0, i16 %a1, i16 %a2) nounwind { ; X86-LABEL: andnot_rotr_i16: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorw %cl, %ax +; X86-NEXT: notl %eax ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; -; X64-LABEL: andnot_rotr_i16: -; X64: # %bb.0: -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorw %cl, %ax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: retq +; X64-NOBMI-LABEL: andnot_rotr_i16: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: rorw %cl, %ax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotr_i16: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: rorw %cl, %si +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI-NEXT: retq %not = xor i16 %a1, -1 %rot = tail call i16 @llvm.fshr.i16(i16 %not, i16 %not, i16 %a2) %and = and i16 %rot, %a0 @@ -296,8 +410,8 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notb %al ; X86-NEXT: rorb %cl, %al +; X86-NEXT: notb %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; @@ -305,9 +419,9 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %esi, %eax -; X64-NEXT: notb %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rorb %cl, %al +; X64-NEXT: notb %al ; X64-NEXT: andb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -318,36 +432,67 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { } define i32 @andnot_rotr_i32_multiuse_not(i32 %a0, i32 %a1, i32 %a2) nounwind { -; X86-LABEL: andnot_rotr_i32_multiuse_not: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: rorl %cl, %esi -; X86-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-NEXT: pushl %eax -; X86-NEXT: calll use_i32@PLT -; X86-NEXT: addl $4, %esp -; X86-NEXT: movl %esi, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotr_i32_multiuse_not: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: movl %eax, %esi +; X86-NOBMI-NEXT: rorl %cl, %esi +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: pushl %eax +; X86-NOBMI-NEXT: calll use_i32@PLT +; X86-NOBMI-NEXT: addl $4, %esp +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotr_i32_multiuse_not: -; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: notl %esi -; X64-NEXT: movl %esi, %ebx -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %ebx -; X64-NEXT: andl %edi, %ebx -; X64-NEXT: movl %esi, %edi -; X64-NEXT: callq use_i32@PLT -; X64-NEXT: movl %ebx, %eax -; X64-NEXT: popq %rbx -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotr_i32_multiuse_not: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: notl %edx +; X86-BMI-NEXT: rorl %cl, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %esi +; X86-BMI-NEXT: pushl %edx +; X86-BMI-NEXT: calll use_i32@PLT +; X86-BMI-NEXT: addl $4, %esp +; X86-BMI-NEXT: movl %esi, %eax +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotr_i32_multiuse_not: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: pushq %rbx +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: notl %esi +; X64-NOBMI-NEXT: movl %esi, %ebx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: rorl %cl, %ebx +; X64-NOBMI-NEXT: andl %edi, %ebx +; X64-NOBMI-NEXT: movl %esi, %edi +; X64-NOBMI-NEXT: callq use_i32@PLT +; X64-NOBMI-NEXT: movl %ebx, %eax +; X64-NOBMI-NEXT: popq %rbx +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotr_i32_multiuse_not: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: pushq %rbx +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: movl %esi, %eax +; X64-BMI-NEXT: notl %eax +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: rorl %cl, %esi +; X64-BMI-NEXT: andnl %edi, %esi, %ebx +; X64-BMI-NEXT: movl %eax, %edi +; X64-BMI-NEXT: callq use_i32@PLT +; X64-BMI-NEXT: movl %ebx, %eax +; X64-BMI-NEXT: popq %rbx +; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %rot = tail call i32 @llvm.fshr.i32(i32 %not, i32 %not, i32 %a2) %and = and i32 %rot, %a0 From 0fb76bae6b2abfe5e0a34557f365a586be989364 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 30 Oct 2024 11:51:55 +0100 Subject: [PATCH 414/425] Reapply "[libc++] Simplify the implementation of std::sort a bit (#104902)" (#114023) This reverts commit ef44e4659878f2. The patch was originally reverted because it was deemed to introduce a performance regression for small inputs, however it also fixed a previous performance regression for larger inputs. So overall, this patch is desirable. --- libcxx/include/__algorithm/comp.h | 3 + libcxx/include/__algorithm/ranges_minmax.h | 2 +- libcxx/include/__algorithm/sort.h | 285 ++++++++---------- libcxx/include/__functional/operations.h | 12 + .../include/__functional/ranges_operations.h | 6 + libcxx/include/__type_traits/desugars_to.h | 6 + .../__type_traits/is_trivially_copyable.h | 4 +- libcxx/src/algorithm.cpp | 3 +- 8 files changed, 150 insertions(+), 171 deletions(-) diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h index 1f38f5d2d99b43c..ab3c598418828af 100644 --- a/libcxx/include/__algorithm/comp.h +++ b/libcxx/include/__algorithm/comp.h @@ -42,6 +42,9 @@ struct __less { } }; +template +inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true; + template inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value; diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h index 4f2b2bf26382da3..5f2e5cb2a1eeab7 100644 --- a/libcxx/include/__algorithm/ranges_minmax.h +++ b/libcxx/include/__algorithm/ranges_minmax.h @@ -89,7 +89,7 @@ struct __minmax { // vectorize the code. if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> && __is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value && - __desugars_to_v<__totally_ordered_less_tag, _Comp, _ValueT, _ValueT>) { + __desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) { minmax_result<_ValueT> __result = {__r[0], __r[0]}; for (auto __e : __r) { if (__e < __result.min) diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index 0b2137dee2f77e3..39868b8b6a30aef 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -27,11 +27,13 @@ #include <__functional/ranges_operations.h> #include <__iterator/iterator_traits.h> #include <__type_traits/conditional.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_same.h> +#include <__type_traits/is_trivially_copyable.h> #include <__type_traits/remove_cvref.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -47,110 +49,11 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -// stable, 2-3 compares, 0-2 swaps - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned -__sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - - unsigned __r = 0; - if (!__c(*__y, *__x)) // if x <= y - { - if (!__c(*__z, *__y)) // if y <= z - return __r; // x <= y && y <= z - // x <= y && y > z - _Ops::iter_swap(__y, __z); // x <= z && y < z - __r = 1; - if (__c(*__y, *__x)) // if x > y - { - _Ops::iter_swap(__x, __y); // x < y && y <= z - __r = 2; - } - return __r; // x <= y && y < z - } - if (__c(*__z, *__y)) // x > y, if y > z - { - _Ops::iter_swap(__x, __z); // x < y && y < z - __r = 1; - return __r; - } - _Ops::iter_swap(__x, __y); // x > y && y <= z - __r = 1; // x < y && x <= z - if (__c(*__z, *__y)) // if y > z - { - _Ops::iter_swap(__y, __z); // x <= y && y < z - __r = 2; - } - return __r; -} // x <= y && y <= z - -// stable, 3-6 compares, 0-5 swaps - -template -_LIBCPP_HIDE_FROM_ABI void -__sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); - if (__c(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__c(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__c(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } -} - -// stable, 4-10 compares, 0-9 swaps - -template -_LIBCPP_HIDE_FROM_ABI void -__sort5(_ForwardIterator __x1, - _ForwardIterator __x2, - _ForwardIterator __x3, - _ForwardIterator __x4, - _ForwardIterator __x5, - _Comp __comp) { - using _Ops = _IterOps<_AlgPolicy>; - - std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp); - if (__comp(*__x5, *__x4)) { - _Ops::iter_swap(__x4, __x5); - if (__comp(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__comp(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__comp(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } - } -} - -// The comparator being simple is a prerequisite for using the branchless optimization. -template -struct __is_simple_comparator : false_type {}; -template <> -struct __is_simple_comparator<__less<>&> : true_type {}; -template -struct __is_simple_comparator&> : true_type {}; -template -struct __is_simple_comparator&> : true_type {}; -#if _LIBCPP_STD_VER >= 20 -template <> -struct __is_simple_comparator : true_type {}; -template <> -struct __is_simple_comparator : true_type {}; -#endif - template ::value_type> -using __use_branchless_sort = - integral_constant::value && sizeof(_Tp) <= sizeof(void*) && - is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>; +inline const bool __use_branchless_sort = + __libcpp_is_contiguous_iterator<_Iter>::value && __is_cheap_to_copy<_Tp> && is_arithmetic<_Tp>::value && + (__desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, _Tp, _Tp> || + __desugars_to_v<__greater_tag, __remove_cvref_t<_Compare>, _Tp, _Tp>); namespace __detail { @@ -161,59 +64,88 @@ enum { __block_size = sizeof(uint64_t) * 8 }; // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary. template -inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; bool __r = __c(*__x, *__y); value_type __tmp = __r ? *__x : *__y; *__y = __r ? *__y : *__x; *__x = __tmp; + return !__r; } // Ensures that *__x, *__y and *__z are ordered according to the comparator __c, // under the assumption that *__y and *__z are already ordered. template -inline _LIBCPP_HIDE_FROM_ABI void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; - bool __r = __c(*__z, *__x); - value_type __tmp = __r ? *__z : *__x; - *__z = __r ? *__x : *__z; - __r = __c(__tmp, *__y); - *__x = __r ? *__x : *__y; - *__y = __r ? *__y : __tmp; + bool __r1 = __c(*__z, *__x); + value_type __tmp = __r1 ? *__z : *__x; + *__z = __r1 ? *__x : *__z; + bool __r2 = __c(__tmp, *__y); + *__x = __r2 ? *__x : *__y; + *__y = __r2 ? *__y : __tmp; + return !__r1 || !__r2; } +// stable, 2-3 compares, 0-2 swaps + template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__cond_swap<_Compare>(__x2, __x3, __c); - std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { + bool __swapped1 = std::__cond_swap<_Compare>(__x2, __x3, __c); + bool __swapped2 = std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + return __swapped1 || __swapped2; } template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); -} + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + + if (!__c(*__y, *__x)) // if x <= y + { + if (!__c(*__z, *__y)) // if y <= z + return false; // x <= y && y <= z + // x <= y && y > z + _Ops::iter_swap(__y, __z); // x <= z && y < z + if (__c(*__y, *__x)) // if x > y + _Ops::iter_swap(__x, __y); // x < y && y <= z + return true; // x <= y && y < z + } + if (__c(*__z, *__y)) // x > y, if y > z + { + _Ops::iter_swap(__x, __z); // x < y && y < z + return true; + } + _Ops::iter_swap(__x, __y); // x > y && y <= z + // x < y && x <= z + if (__c(*__z, *__y)) // if y > z + _Ops::iter_swap(__y, __z); // x <= y && y < z + return true; +} // x <= y && y <= z + +// stable, 3-6 compares, 0-5 swaps template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x3, __c); std::__cond_swap<_Compare>(__x2, __x4, __c); std::__cond_swap<_Compare>(__x1, __x2, __c); @@ -224,27 +156,39 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { - std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c); + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); + if (__c(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__c(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__c(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } } +// stable, 4-10 compares, 0-9 swaps + template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x2, __c); std::__cond_swap<_Compare>(__x4, __x5, __c); std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c); @@ -256,16 +200,29 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { - std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>( - std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c); + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + + std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __comp); + if (__comp(*__x5, *__x4)) { + _Ops::iter_swap(__x4, __x5); + if (__comp(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__comp(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__comp(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } + } } // Assumes size > 0 @@ -355,14 +312,14 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator _Ops::iter_swap(__first, __last); return true; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); return true; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort4<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return true; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort5<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), @@ -373,7 +330,7 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator } typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; _RandomAccessIterator __j = __first + difference_type(2); - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); const unsigned __limit = 8; unsigned __count = 0; for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) { @@ -780,14 +737,14 @@ void __introsort(_RandomAccessIterator __first, _Ops::iter_swap(__first, __last); return; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); return; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort4<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort5<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), @@ -928,10 +885,8 @@ __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co // Only use bitset partitioning for arithmetic types. We should also check // that the default comparator is in use so that we are sure that there are no // branches in the comparator. - std::__introsort<_AlgPolicy, - _Comp&, - _RandomAccessIterator, - __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(__first, __last, __comp, __depth_limit); + std::__introsort<_AlgPolicy, _Comp&, _RandomAccessIterator, __use_branchless_sort<_Comp, _RandomAccessIterator> >( + __first, __last, __comp, __depth_limit); } template diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h index 6022bd679ed3e3c..67d9da289aead3f 100644 --- a/libcxx/include/__functional/operations.h +++ b/libcxx/include/__functional/operations.h @@ -362,6 +362,9 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less); +template +inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true; + template inline const bool __desugars_to_v<__totally_ordered_less_tag, less<_Tp>, _Tp, _Tp> = is_integral<_Tp>::value; @@ -377,6 +380,9 @@ struct _LIBCPP_TEMPLATE_VIS less { typedef void is_transparent; }; +template +inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Up> = true; + template inline const bool __desugars_to_v<__totally_ordered_less_tag, less<>, _Tp, _Tp> = is_integral<_Tp>::value; #endif @@ -446,6 +452,9 @@ struct _LIBCPP_TEMPLATE_VIS greater : __binary_function<_Tp, _Tp, bool> { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(greater); +template +inline const bool __desugars_to_v<__greater_tag, greater<_Tp>, _Tp, _Tp> = true; + #if _LIBCPP_STD_VER >= 14 template <> struct _LIBCPP_TEMPLATE_VIS greater { @@ -457,6 +466,9 @@ struct _LIBCPP_TEMPLATE_VIS greater { } typedef void is_transparent; }; + +template +inline const bool __desugars_to_v<__greater_tag, greater<>, _Tp, _Up> = true; #endif // Logical operations diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index f023d765a6c8ab2..df95843e7c9af61 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -102,6 +102,12 @@ inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = tru template inline const bool __desugars_to_v<__totally_ordered_less_tag, ranges::less, _Tp, _Up> = true; +template +inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true; + +template +inline const bool __desugars_to_v<__greater_tag, ranges::greater, _Tp, _Up> = true; + #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/desugars_to.h b/libcxx/include/__type_traits/desugars_to.h index b0ce7c414e5d778..452c70bfbad66de 100644 --- a/libcxx/include/__type_traits/desugars_to.h +++ b/libcxx/include/__type_traits/desugars_to.h @@ -25,6 +25,12 @@ struct __equal_tag {}; // syntactically, the operation is equivalent to calling `a + b` struct __plus_tag {}; +// syntactically, the operation is equivalent to calling `a < b` +struct __less_tag {}; + +// syntactically, the operation is equivalent to calling `a > b` +struct __greater_tag {}; + // syntactically, the operation is equivalent to calling `a < b`, and these expressions // have to be true for any `a` and `b`: // - `(a < b) == (b > a)` diff --git a/libcxx/include/__type_traits/is_trivially_copyable.h b/libcxx/include/__type_traits/is_trivially_copyable.h index e92af126ee94d93..8eb3ba7581af159 100644 --- a/libcxx/include/__type_traits/is_trivially_copyable.h +++ b/libcxx/include/__type_traits/is_trivially_copyable.h @@ -27,10 +27,8 @@ template inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable(_Tp); #endif -#if _LIBCPP_STD_VER >= 20 template -inline constexpr bool __is_cheap_to_copy = is_trivially_copyable_v<_Tp> && sizeof(_Tp) <= sizeof(std::intmax_t); -#endif +inline const bool __is_cheap_to_copy = __is_trivially_copyable(_Tp) && sizeof(_Tp) <= sizeof(std::intmax_t); _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/src/algorithm.cpp b/libcxx/src/algorithm.cpp index af9d60a8e271e84..a7c39b5e5183a4d 100644 --- a/libcxx/src/algorithm.cpp +++ b/libcxx/src/algorithm.cpp @@ -21,8 +21,7 @@ void __sort(RandomAccessIterator first, RandomAccessIterator last, Comp comp) { std::__introsort<_ClassicAlgPolicy, ranges::less, RandomAccessIterator, - __use_branchless_sort::value>( - first, last, ranges::less{}, depth_limit); + __use_branchless_sort>(first, last, ranges::less{}, depth_limit); } // clang-format off From f447cf15b2fcf40e519633d4cd211bb4211bfc08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20R=C3=B8nne=20Petersen?= Date: Wed, 30 Oct 2024 11:55:57 +0100 Subject: [PATCH 415/425] [CSKY] Fix some typos in CPU feature descriptions (NFC) (#105774) In Zig, we have a tool that updates our CPU model/feature data from LLVM's. Noticed these typos when running it for LLVM 19. Note: I don't have commit access. --- llvm/lib/Target/CSKY/CSKY.td | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td index 9809caa8bd8f65b..f88daeed8d42154 100644 --- a/llvm/lib/Target/CSKY/CSKY.td +++ b/llvm/lib/Target/CSKY/CSKY.td @@ -97,28 +97,28 @@ def iHasFLOAT7E60 : Predicate<"Subtarget->hasFLOAT7E60()">, "Support CSKY float7e60 instructions">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", - "Enable divide instrutions">; + "Enable divide instructions">; def HasHWDiv : Predicate<"Subtarget->hasHardwareDivide()">, AssemblerPredicate<(all_of FeatureHWDiv), - "Enable divide instrutions">; + "Enable divide instructions">; def FeatureSTM : SubtargetFeature<"multiple_stld", "HasSTM", "true", - "Enable multiple load/store instrutions">; + "Enable multiple load/store instructions">; def HasSTM : Predicate<"Subtarget->hasSTM()">, AssemblerPredicate<(all_of FeatureSTM), - "Enable multiple load/store instrutions">; + "Enable multiple load/store instructions">; def FeaturePushPop : SubtargetFeature<"pushpop", "HasPushPop", "true", - "Enable push/pop instrutions">; + "Enable push/pop instructions">; def HasPushPop : Predicate<"Subtarget->hasPushPop()">, AssemblerPredicate<(all_of FeaturePushPop), - "Enable push/pop instrutions">; + "Enable push/pop instructions">; def FeatureDSP - : SubtargetFeature<"edsp", "HasDSP", "true", "Enable DSP instrutions">; + : SubtargetFeature<"edsp", "HasDSP", "true", "Enable DSP instructions">; def HasDSP : Predicate<"Subtarget->hasDSP()">, AssemblerPredicate<(all_of FeatureDSP), - "Enable DSP instrutions">; + "Enable DSP instructions">; def HasDSP1E2 : SubtargetFeature<"dsp1e2", "HasDSP1E2", "true", "Support CSKY dsp1e2 instructions">; @@ -133,16 +133,16 @@ def iHasDSPE60 : Predicate<"Subtarget->hasDSPE60()">, "Support CSKY dspe60 instructions">; def FeatureDSPV2 : SubtargetFeature<"dspv2", "HasDSPV2", "true", - "Enable DSP V2.0 instrutions">; + "Enable DSP V2.0 instructions">; def HasDSPV2 : Predicate<"Subtarget->hasDSPV2()">, AssemblerPredicate<(all_of FeatureDSPV2), - "Enable DSP V2.0 instrutions">; + "Enable DSP V2.0 instructions">; def FeatureDSP_Silan : SubtargetFeature<"dsp_silan", "HasDSP_Silan", "true", - "Enable DSP Silan instrutions">; + "Enable DSP Silan instructions">; def HasDSP_Silan : Predicate<"Subtarget->hasDSP_Silan()">, AssemblerPredicate<(all_of FeatureDSP_Silan), - "Enable DSP Silan instrutions">; + "Enable DSP Silan instructions">; // Atomic Support def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true", @@ -232,11 +232,11 @@ def FeatureSoftTP : SubtargetFeature<"soft-tp", "ReadTPHard", "false", "Disable TLS Pointer register">; def FeatureIstack : SubtargetFeature<"istack", "EnableInterruptAttribute", - "true", "Enable interrput attribute">; + "true", "Enable interrupt attribute">; def EnableInterruptAttribute : Predicate<"Subtarget->enableInterruptAttribute()">, AssemblerPredicate<(all_of FeatureIstack), - "Enable interrput attribute">; + "Enable interrupt attribute">; def FeatureConstPool : SubtargetFeature<"constpool", "DumpConstPool", "true", "Dump the constant pool by compiler">; From 092a819e94da3fc0cac6982e99861546237fcb04 Mon Sep 17 00:00:00 2001 From: Kiran Chandramohan Date: Wed, 30 Oct 2024 10:58:26 +0000 Subject: [PATCH 416/425] [Flang][OpenMP] Add frontend support for directives involving master (#113893) Issue deprecation warning for these directives. Lowering currently supports parallel master, for all other combined or composite directives involving master, issue TODO errors. Note: The first commit changes the formatting and generalizes the deprecation message emission for reuse in the second commit. I can pull it out into a separate commit if required. --- .../flang/Semantics/openmp-directive-sets.h | 1 + flang/lib/Parser/openmp-parsers.cpp | 8 ++ flang/lib/Parser/unparse.cpp | 15 ++++ flang/lib/Semantics/resolve-directives.cpp | 53 +++++++++++--- flang/test/Lower/OpenMP/master_taskloop.f90 | 14 ++++ .../Lower/OpenMP/master_taskloop_simd.f90 | 14 ++++ .../OpenMP/parallel-master-taskloop-simd.f90 | 14 ++++ .../Lower/OpenMP/parallel-master-taskloop.f90 | 14 ++++ flang/test/Lower/OpenMP/parallel-master.f90 | 16 ++++ flang/test/Parser/OpenMP/master-unparse.f90 | 73 +++++++++++++++++++ .../Semantics/OpenMP/clause-validity01.f90 | 4 +- flang/test/Semantics/OpenMP/deprecation.f90 | 59 +++++++++++++++ flang/test/Semantics/OpenMP/flush02.f90 | 2 +- .../test/Semantics/OpenMP/nested-barrier.f90 | 4 +- flang/test/Semantics/OpenMP/nested-master.f90 | 24 +++--- flang/test/Semantics/OpenMP/nested-teams.f90 | 2 +- flang/test/Semantics/OpenMP/ordered-simd.f90 | 8 +- 17 files changed, 293 insertions(+), 32 deletions(-) create mode 100644 flang/test/Lower/OpenMP/master_taskloop.f90 create mode 100644 flang/test/Lower/OpenMP/master_taskloop_simd.f90 create mode 100644 flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 create mode 100644 flang/test/Lower/OpenMP/parallel-master-taskloop.f90 create mode 100644 flang/test/Lower/OpenMP/parallel-master.f90 create mode 100644 flang/test/Parser/OpenMP/master-unparse.f90 create mode 100644 flang/test/Semantics/OpenMP/deprecation.f90 diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h index 50d6d5b59ef7dd9..55ef1e0ca61b9f6 100644 --- a/flang/include/flang/Semantics/openmp-directive-sets.h +++ b/flang/include/flang/Semantics/openmp-directive-sets.h @@ -210,6 +210,7 @@ static const OmpDirectiveSet blockConstructSet{ Directive::OMPD_ordered, Directive::OMPD_parallel, Directive::OMPD_parallel_masked, + Directive::OMPD_parallel_master, Directive::OMPD_parallel_workshare, Directive::OMPD_scope, Directive::OMPD_single, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 598439cbee87e64..5276e1ec1dcadd7 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -583,12 +583,19 @@ TYPE_PARSER(sourced(construct(first( "MASKED TASKLOOP SIMD" >> pure(llvm::omp::Directive::OMPD_masked_taskloop_simd), "MASKED TASKLOOP" >> pure(llvm::omp::Directive::OMPD_masked_taskloop), + "MASTER TASKLOOP SIMD" >> + pure(llvm::omp::Directive::OMPD_master_taskloop_simd), + "MASTER TASKLOOP" >> pure(llvm::omp::Directive::OMPD_master_taskloop), "PARALLEL DO SIMD" >> pure(llvm::omp::Directive::OMPD_parallel_do_simd), "PARALLEL DO" >> pure(llvm::omp::Directive::OMPD_parallel_do), "PARALLEL MASKED TASKLOOP SIMD" >> pure(llvm::omp::Directive::OMPD_parallel_masked_taskloop_simd), "PARALLEL MASKED TASKLOOP" >> pure(llvm::omp::Directive::OMPD_parallel_masked_taskloop), + "PARALLEL MASTER TASKLOOP SIMD" >> + pure(llvm::omp::Directive::OMPD_parallel_master_taskloop_simd), + "PARALLEL MASTER TASKLOOP" >> + pure(llvm::omp::Directive::OMPD_parallel_master_taskloop), "SIMD" >> pure(llvm::omp::Directive::OMPD_simd), "TARGET LOOP" >> pure(llvm::omp::Directive::OMPD_target_loop), "TARGET PARALLEL DO SIMD" >> @@ -706,6 +713,7 @@ TYPE_PARSER(construct(first( "MASTER" >> pure(llvm::omp::Directive::OMPD_master), "ORDERED" >> pure(llvm::omp::Directive::OMPD_ordered), "PARALLEL MASKED" >> pure(llvm::omp::Directive::OMPD_parallel_masked), + "PARALLEL MASTER" >> pure(llvm::omp::Directive::OMPD_parallel_master), "PARALLEL WORKSHARE" >> pure(llvm::omp::Directive::OMPD_parallel_workshare), "PARALLEL" >> pure(llvm::omp::Directive::OMPD_parallel), "SCOPE" >> pure(llvm::omp::Directive::OMPD_scope), diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 39fcb61609e33b3..e80ab0da1360eb5 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2274,6 +2274,12 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_masked_taskloop: Word("MASKED TASKLOOP"); break; + case llvm::omp::Directive::OMPD_master_taskloop_simd: + Word("MASTER TASKLOOP SIMD"); + break; + case llvm::omp::Directive::OMPD_master_taskloop: + Word("MASTER TASKLOOP"); + break; case llvm::omp::Directive::OMPD_parallel_do: Word("PARALLEL DO "); break; @@ -2286,6 +2292,12 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_parallel_masked_taskloop: Word("PARALLEL MASKED TASKLOOP"); break; + case llvm::omp::Directive::OMPD_parallel_master_taskloop_simd: + Word("PARALLEL MASTER TASKLOOP SIMD"); + break; + case llvm::omp::Directive::OMPD_parallel_master_taskloop: + Word("PARALLEL MASTER TASKLOOP"); + break; case llvm::omp::Directive::OMPD_simd: Word("SIMD "); break; @@ -2390,6 +2402,9 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_parallel_masked: Word("PARALLEL MASKED"); break; + case llvm::omp::Directive::OMPD_parallel_master: + Word("PARALLEL MASTER"); + break; case llvm::omp::Directive::OMPD_parallel_workshare: Word("PARALLEL WORKSHARE "); break; diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 014b7987a658bd3..5e3ad5f3b4773db 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -1531,6 +1531,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { case llvm::omp::Directive::OMPD_masked: case llvm::omp::Directive::OMPD_parallel_masked: case llvm::omp::Directive::OMPD_master: + case llvm::omp::Directive::OMPD_parallel_master: case llvm::omp::Directive::OMPD_ordered: case llvm::omp::Directive::OMPD_parallel: case llvm::omp::Directive::OMPD_scope: @@ -1550,7 +1551,8 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { // TODO others break; } - if (beginDir.v == llvm::omp::Directive::OMPD_master) + if (beginDir.v == llvm::omp::Directive::OMPD_master || + beginDir.v == llvm::omp::Directive::OMPD_parallel_master) IssueNonConformanceWarning(beginDir.v, beginDir.source); ClearDataSharingAttributeObjects(); ClearPrivateDataSharingAttributeObjects(); @@ -1563,7 +1565,9 @@ void OmpAttributeVisitor::Post(const parser::OpenMPBlockConstruct &x) { const auto &beginDir{std::get(beginBlockDir.t)}; switch (beginDir.v) { case llvm::omp::Directive::OMPD_masked: + case llvm::omp::Directive::OMPD_master: case llvm::omp::Directive::OMPD_parallel_masked: + case llvm::omp::Directive::OMPD_parallel_master: case llvm::omp::Directive::OMPD_parallel: case llvm::omp::Directive::OMPD_scope: case llvm::omp::Directive::OMPD_single: @@ -1634,10 +1638,14 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { case llvm::omp::Directive::OMPD_loop: case llvm::omp::Directive::OMPD_masked_taskloop_simd: case llvm::omp::Directive::OMPD_masked_taskloop: + case llvm::omp::Directive::OMPD_master_taskloop_simd: + case llvm::omp::Directive::OMPD_master_taskloop: case llvm::omp::Directive::OMPD_parallel_do: case llvm::omp::Directive::OMPD_parallel_do_simd: case llvm::omp::Directive::OMPD_parallel_masked_taskloop_simd: case llvm::omp::Directive::OMPD_parallel_masked_taskloop: + case llvm::omp::Directive::OMPD_parallel_master_taskloop_simd: + case llvm::omp::Directive::OMPD_parallel_master_taskloop: case llvm::omp::Directive::OMPD_simd: case llvm::omp::Directive::OMPD_target_loop: case llvm::omp::Directive::OMPD_target_parallel_do: @@ -1662,7 +1670,11 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { default: break; } - if (beginDir.v == llvm::omp::Directive::OMPD_target_loop) + if (beginDir.v == llvm::omp::OMPD_master_taskloop || + beginDir.v == llvm::omp::OMPD_master_taskloop_simd || + beginDir.v == llvm::omp::OMPD_parallel_master_taskloop || + beginDir.v == llvm::omp::OMPD_parallel_master_taskloop_simd || + beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source); ClearDataSharingAttributeObjects(); SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); @@ -2891,18 +2903,39 @@ void OmpAttributeVisitor::AddOmpRequiresToScope(Scope &scope, void OmpAttributeVisitor::IssueNonConformanceWarning( llvm::omp::Directive D, parser::CharBlock source) { - std::string warnStr = ""; - std::string dirName = llvm::omp::getOpenMPDirectiveName(D).str(); + std::string warnStr; + llvm::raw_string_ostream warnStrOS(warnStr); + warnStrOS << "OpenMP directive " + << parser::ToUpperCaseLetters( + llvm::omp::getOpenMPDirectiveName(D).str()) + << " has been deprecated"; + + auto setAlternativeStr = [&warnStrOS](llvm::StringRef alt) { + warnStrOS << ", please use " << alt << " instead."; + }; switch (D) { case llvm::omp::OMPD_master: - warnStr = "OpenMP directive '" + dirName + - "' has been deprecated, please use 'masked' instead."; + setAlternativeStr("MASKED"); + break; + case llvm::omp::OMPD_master_taskloop: + setAlternativeStr("MASKED TASKLOOP"); + break; + case llvm::omp::OMPD_master_taskloop_simd: + setAlternativeStr("MASKED TASKLOOP SIMD"); + break; + case llvm::omp::OMPD_parallel_master: + setAlternativeStr("PARALLEL MASKED"); + break; + case llvm::omp::OMPD_parallel_master_taskloop: + setAlternativeStr("PARALLEL MASKED TASKLOOP"); + break; + case llvm::omp::OMPD_parallel_master_taskloop_simd: + setAlternativeStr("PARALLEL_MASKED TASKLOOP SIMD"); break; case llvm::omp::OMPD_target_loop: - default: - warnStr = "OpenMP directive '" + dirName + "' has been deprecated."; + default:; } - context_.Warn( - common::UsageWarning::OpenMPUsage, source, "%s"_warn_en_US, warnStr); + context_.Warn(common::UsageWarning::OpenMPUsage, source, "%s"_warn_en_US, + warnStrOS.str()); } } // namespace Fortran::semantics diff --git a/flang/test/Lower/OpenMP/master_taskloop.f90 b/flang/test/Lower/OpenMP/master_taskloop.f90 new file mode 100644 index 000000000000000..26f664b2662dcb0 --- /dev/null +++ b/flang/test/Lower/OpenMP/master_taskloop.f90 @@ -0,0 +1,14 @@ +! This test checks lowering of OpenMP master taskloop Directive. + +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +subroutine test_master_taskloop + integer :: i, j = 1 + !CHECK: not yet implemented: Taskloop construct + !$omp master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop +end subroutine diff --git a/flang/test/Lower/OpenMP/master_taskloop_simd.f90 b/flang/test/Lower/OpenMP/master_taskloop_simd.f90 new file mode 100644 index 000000000000000..e928afd65244a4d --- /dev/null +++ b/flang/test/Lower/OpenMP/master_taskloop_simd.f90 @@ -0,0 +1,14 @@ +! This test checks lowering of OpenMP master taskloop simd Directive. + +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +subroutine test_master_taskloop_simd() + integer :: i, j = 1 + !CHECK: not yet implemented: Composite TASKLOOP SIMD + !$omp master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop simd +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 b/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 new file mode 100644 index 000000000000000..086ed01d16d364d --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 @@ -0,0 +1,14 @@ +! This test checks lowering of OpenMP parallel master taskloop simd Directive. + +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +subroutine test_parallel_master_taskloop_simd + integer :: i, j = 1 + !CHECK: not yet implemented: Composite TASKLOOP SIMD + !$omp parallel master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop simd +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-master-taskloop.f90 b/flang/test/Lower/OpenMP/parallel-master-taskloop.f90 new file mode 100644 index 000000000000000..17ceb9496c8d342 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-master-taskloop.f90 @@ -0,0 +1,14 @@ +! This test checks lowering of OpenMP parallel master taskloop Directive. + +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +subroutine test_parallel_master_taskloop + integer :: i, j = 1 + !CHECK: not yet implemented: Taskloop construct + !$omp parallel master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-master.f90 b/flang/test/Lower/OpenMP/parallel-master.f90 new file mode 100644 index 000000000000000..8f3ee31b328537e --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-master.f90 @@ -0,0 +1,16 @@ +! This test checks lowering of the parallel master combined construct. + +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s +! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s + +! CHECK-LABEL: func @_QPparallel_master +subroutine parallel_master(x) + integer :: x + !CHECK: omp.parallel { + !CHECK: omp.master { + !$omp parallel master + x = 1 + !$omp end parallel master + !CHECK: } + !CHECK: } +end subroutine parallel_master diff --git a/flang/test/Parser/OpenMP/master-unparse.f90 b/flang/test/Parser/OpenMP/master-unparse.f90 new file mode 100644 index 000000000000000..30c293a521b5d1e --- /dev/null +++ b/flang/test/Parser/OpenMP/master-unparse.f90 @@ -0,0 +1,73 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s + +! Check for parsing of master directive + + +subroutine test_master() + integer :: c = 1 + !PARSE-TREE: OmpBeginBlockDirective + !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = master + !CHECK: !$omp master + !$omp master + c = c + 1 + !$omp end master +end subroutine + +subroutine test_master_taskloop_simd() + integer :: i, j = 1 + !PARSE-TREE: OmpBeginLoopDirective + !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = master taskloop simd + !CHECK: !$omp master taskloop simd + !$omp master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop simd +end subroutine + +subroutine test_master_taskloop + integer :: i, j = 1 + !PARSE-TREE: OmpBeginLoopDirective + !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = master taskloop + !CHECK: !$omp master taskloop + !$omp master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop +end subroutine + +subroutine test_parallel_master + integer :: c = 2 + !PARSE-TREE: OmpBeginBlockDirective + !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = parallel master + !CHECK: !$omp parallel master + !$omp parallel master + c = c + 2 + !$omp end parallel master +end subroutine + +subroutine test_parallel_master_taskloop_simd + integer :: i, j = 1 + !PARSE-TREE: OmpBeginLoopDirective + !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = parallel master taskloop simd + !CHECK: !$omp parallel master taskloop simd + !$omp parallel master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop simd +end subroutine + +subroutine test_parallel_master_taskloop + integer :: i, j = 1 + !PARSE-TREE: OmpBeginLoopDirective + !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = parallel master taskloop + !CHECK: !$omp parallel master taskloop + !$omp parallel master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop +end subroutine diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index 1a7a57b124e9bda..124f1a02d99fba7 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -476,14 +476,14 @@ ! 2.13.1 master !$omp parallel - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master a=3.14 !$omp end master !$omp end parallel !$omp parallel - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: NUM_THREADS clause is not allowed on the MASTER directive !$omp master num_threads(4) a=3.14 diff --git a/flang/test/Semantics/OpenMP/deprecation.f90 b/flang/test/Semantics/OpenMP/deprecation.f90 new file mode 100644 index 000000000000000..e04f43026bbce27 --- /dev/null +++ b/flang/test/Semantics/OpenMP/deprecation.f90 @@ -0,0 +1,59 @@ +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -Werror + +! Check for deprecation of master directive and its combined/composite variants + +subroutine test_master() + integer :: c = 1 +!WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. + !$omp master + c = c + 1 + !$omp end master +end subroutine + +subroutine test_parallel_master + integer :: c = 2 +!WARNING: OpenMP directive PARALLEL MASTER has been deprecated, please use PARALLEL MASKED instead. + !$omp parallel master + c = c + 2 + !$omp end parallel master +end subroutine + +subroutine test_master_taskloop_simd() + integer :: i, j = 1 +!WARNING: OpenMP directive MASTER TASKLOOP SIMD has been deprecated, please use MASKED TASKLOOP SIMD instead. + !$omp master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop simd +end subroutine + +subroutine test_master_taskloop + integer :: i, j = 1 +!WARNING: OpenMP directive MASTER TASKLOOP has been deprecated, please use MASKED TASKLOOP instead. + !$omp master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop +end subroutine + +subroutine test_parallel_master_taskloop_simd + integer :: i, j = 1 +!WARNING: OpenMP directive PARALLEL MASTER TASKLOOP SIMD has been deprecated, please use PARALLEL_MASKED TASKLOOP SIMD instead. + !$omp parallel master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop simd +end subroutine + +subroutine test_parallel_master_taskloop + integer :: i, j = 1 +!WARNING: OpenMP directive PARALLEL MASTER TASKLOOP has been deprecated, please use PARALLEL MASKED TASKLOOP instead. + !$omp parallel master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop +end subroutine diff --git a/flang/test/Semantics/OpenMP/flush02.f90 b/flang/test/Semantics/OpenMP/flush02.f90 index f06719f302fd7a7..ed0cf6602d574af 100644 --- a/flang/test/Semantics/OpenMP/flush02.f90 +++ b/flang/test/Semantics/OpenMP/flush02.f90 @@ -80,7 +80,7 @@ !$omp parallel num_threads(4) array = (/1, 2, 3, 4, 5, 6, 7, 8, 9, 10/) - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master !$omp flush (array) !$omp end master diff --git a/flang/test/Semantics/OpenMP/nested-barrier.f90 b/flang/test/Semantics/OpenMP/nested-barrier.f90 index aae283229e330d1..7c635d8e23cc0d1 100644 --- a/flang/test/Semantics/OpenMP/nested-barrier.f90 +++ b/flang/test/Semantics/OpenMP/nested-barrier.f90 @@ -75,7 +75,7 @@ program omp_nest_barrier end do !$omp end critical - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master do i = 1, 10 k = k + 1 @@ -108,7 +108,7 @@ program omp_nest_barrier end do !$omp end ordered - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master do i = 1, 10 !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region. diff --git a/flang/test/Semantics/OpenMP/nested-master.f90 b/flang/test/Semantics/OpenMP/nested-master.f90 index 069de67cafae286..b21ca5d14159318 100644 --- a/flang/test/Semantics/OpenMP/nested-master.f90 +++ b/flang/test/Semantics/OpenMP/nested-master.f90 @@ -9,7 +9,7 @@ program omp_nest_master !$omp do do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -17,7 +17,7 @@ program omp_nest_master end do !$omp sections - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master do i = 1, 10 @@ -27,7 +27,7 @@ program omp_nest_master !$omp end sections !$omp single - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master do i = 1, 10 @@ -41,7 +41,7 @@ program omp_nest_master !$omp task do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -52,7 +52,7 @@ program omp_nest_master !$omp taskloop do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -63,7 +63,7 @@ program omp_nest_master !$omp target parallel do simd do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: The only OpenMP constructs that can be encountered during execution of a 'SIMD' region are the `ATOMIC` construct, the `LOOP` construct, the `SIMD` construct and the `ORDERED` construct with the `SIMD` clause. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master @@ -75,7 +75,7 @@ program omp_nest_master !$omp critical do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -85,7 +85,7 @@ program omp_nest_master !$omp ordered do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -99,7 +99,7 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -116,7 +116,7 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -133,7 +133,7 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -151,7 +151,7 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 diff --git a/flang/test/Semantics/OpenMP/nested-teams.f90 b/flang/test/Semantics/OpenMP/nested-teams.f90 index f3b96b0ab439036..06eea12aba55956 100644 --- a/flang/test/Semantics/OpenMP/nested-teams.f90 +++ b/flang/test/Semantics/OpenMP/nested-teams.f90 @@ -42,7 +42,7 @@ program main !$omp end teams end do - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master !ERROR: TEAMS region can only be strictly nested within the implicit parallel region or TARGET region !$omp teams diff --git a/flang/test/Semantics/OpenMP/ordered-simd.f90 b/flang/test/Semantics/OpenMP/ordered-simd.f90 index ed52b7594910028..716dc42c28bb644 100644 --- a/flang/test/Semantics/OpenMP/ordered-simd.f90 +++ b/flang/test/Semantics/OpenMP/ordered-simd.f90 @@ -95,7 +95,7 @@ SUBROUTINE ORDERED_BAD(N) !$OMP CRITICAL C = C - A * B - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$OMP MASTER DO I = 1,N !ERROR: `ORDERED` region may not be closely nested inside of `CRITICAL`, `ORDERED`, explicit `TASK` or `TASKLOOP` region. @@ -108,7 +108,7 @@ SUBROUTINE ORDERED_BAD(N) !$OMP ORDERED C = C - A * B - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$OMP MASTER DO I = 1,N !ERROR: `ORDERED` region may not be closely nested inside of `CRITICAL`, `ORDERED`, explicit `TASK` or `TASKLOOP` region. @@ -121,7 +121,7 @@ SUBROUTINE ORDERED_BAD(N) !$OMP TASK C = C - A * B - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$OMP MASTER DO I = 1,N @@ -136,7 +136,7 @@ SUBROUTINE ORDERED_BAD(N) !$OMP TASKLOOP DO J= 1,N C = C - A * B - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$OMP MASTER DO I = 1,N From 2de1fc82861edbc484b7a1b82a37aa29d4b982de Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Oct 2024 11:10:35 +0000 Subject: [PATCH 417/425] [PhaseOrdering][X86] Add additional test coverage for #49736 I've kept the old PR50392 tag since this is such an old issue.... --- .../Transforms/PhaseOrdering/X86/pr50392.ll | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll new file mode 100644 index 000000000000000..4a024cc4c0309c1 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX,AVX2 + +define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) { +; SSE-LABEL: @PR50392( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SSE-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SSE-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]] +; SSE-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3 +; SSE-NEXT: ret <4 x double> [[SHUFFLE]] +; +; AVX1-LABEL: @PR50392( +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; AVX1-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2 +; AVX1-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3 +; AVX1-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]] +; AVX1-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3 +; AVX1-NEXT: ret <4 x double> [[SHUFFLE]] +; +; AVX2-LABEL: @PR50392( +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> +; AVX2-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[B]], [[SHIFT]] +; AVX2-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP5]], <4 x i32> +; AVX2-NEXT: ret <4 x double> [[SHUFFLE]] +; + %vecext = extractelement <4 x double> %a, i32 0 + %vecext1 = extractelement <4 x double> %a, i32 1 + %add = fadd double %vecext, %vecext1 + %vecinit = insertelement <4 x double> poison, double %add, i32 0 + %vecext2 = extractelement <4 x double> %a, i32 2 + %vecext3 = extractelement <4 x double> %a, i32 3 + %add4 = fadd double %vecext2, %vecext3 + %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1 + %vecext6 = extractelement <4 x double> %b, i32 0 + %vecext7 = extractelement <4 x double> %b, i32 1 + %add8 = fadd double %vecext6, %vecext7 + %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2 + %vecext10 = extractelement <4 x double> %b, i32 2 + %vecext11 = extractelement <4 x double> %b, i32 3 + %add12 = fadd double %vecext10, %vecext11 + %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3 + %shuffle = shufflevector <4 x double> %vecinit13, <4 x double> %a, <4 x i32> + ret <4 x double> %shuffle +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} From fcfd64304fce91747b8b03ce84919c4415a941d6 Mon Sep 17 00:00:00 2001 From: Enna1 Date: Wed, 30 Oct 2024 19:25:08 +0800 Subject: [PATCH 418/425] [lld][ELF] Fix typo in help text for plugin-opt=opt-remarks-with-hotness (NFC) (#114016) --- lld/ELF/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index c80c4017d3512cf..ebe772042642106 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -720,7 +720,7 @@ def: J<"plugin-opt=opt-remarks-format=">, HelpText<"Alias for --opt-remarks-format">; def: F<"plugin-opt=opt-remarks-with-hotness">, Alias, - HelpText<"Alias for --opt-remarks-with_hotness">; + HelpText<"Alias for --opt-remarks-with-hotness">; def: J<"plugin-opt=opt-remarks-hotness-threshold=">, Alias, HelpText<"Alias for --opt-remarks-hotness-threshold">; From 5dac2db5a8dab1feccc176cfb6cc4080fa5656e4 Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Wed, 30 Oct 2024 11:53:50 +0000 Subject: [PATCH 419/425] [FMV][AArch64] Remove features which can be expressed as a combination of others. (#113580) Removes sve-bf16, sve-ebf16, and sve-i8mm since they are obsolete. One could write target_version("sve+bf16") instead of sve-bf16 for instance. Approved in ACLE as https://github.com/ARM-software/acle/pull/353 --- clang/lib/Basic/Targets/AArch64.cpp | 2 -- .../CodeGen/aarch64-cpu-supports-target.c | 4 ++-- clang/test/CodeGen/aarch64-fmv-dependencies.c | 11 ---------- clang/test/CodeGen/attr-target-version.c | 20 +++++++++---------- clang/test/Sema/attr-target-clones-aarch64.c | 4 ++-- clang/test/SemaCXX/attr-target-version.cpp | 4 ++-- .../builtins/cpu_model/AArch64CPUFeatures.inc | 6 +++--- .../builtins/cpu_model/aarch64/fmv/mrs.inc | 6 ------ .../llvm/TargetParser/AArch64CPUFeatures.inc | 6 +++--- llvm/lib/Target/AArch64/AArch64FMV.td | 3 --- 10 files changed, 22 insertions(+), 44 deletions(-) diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index a0f94d5d3154807..3d8de0294d4ba33 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -765,8 +765,6 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { .Case("i8mm", HasMatMul) .Case("bf16", HasBFloat16) .Case("sve", FPU & SveMode) - .Case("sve-bf16", FPU & SveMode && HasBFloat16) - .Case("sve-i8mm", FPU & SveMode && HasMatMul) .Case("sve-b16b16", HasSVEB16B16) .Case("f32mm", FPU & SveMode && HasMatmulFP32) .Case("f64mm", FPU & SveMode && HasMatmulFP64) diff --git a/clang/test/CodeGen/aarch64-cpu-supports-target.c b/clang/test/CodeGen/aarch64-cpu-supports-target.c index 5186cab92a921d7..e3a75e9a1fc7d39 100644 --- a/clang/test/CodeGen/aarch64-cpu-supports-target.c +++ b/clang/test/CodeGen/aarch64-cpu-supports-target.c @@ -9,9 +9,9 @@ int check_all_feature() { return 3; else if (__builtin_cpu_supports("fcma+rcpc+rcpc2+rcpc3+frintts+dgh")) return 4; - else if (__builtin_cpu_supports("i8mm+bf16+ebf16+rpres+sve+sve-bf16")) + else if (__builtin_cpu_supports("i8mm+bf16+ebf16+rpres+sve")) return 5; - else if (__builtin_cpu_supports("sve-ebf16+sve-i8mm+f32mm+f64mm")) + else if (__builtin_cpu_supports("sve+ebf16+i8mm+f32mm+f64mm")) return 6; else if (__builtin_cpu_supports("sve2+sve2-aes+sve2-pmull128")) return 7; diff --git a/clang/test/CodeGen/aarch64-fmv-dependencies.c b/clang/test/CodeGen/aarch64-fmv-dependencies.c index 6d230007f91ff95..db6be423b99f788 100644 --- a/clang/test/CodeGen/aarch64-fmv-dependencies.c +++ b/clang/test/CodeGen/aarch64-fmv-dependencies.c @@ -135,15 +135,6 @@ __attribute__((target_version("ssbs"))) int fmv(void) { return 0; } // CHECK: define dso_local i32 @fmv._Msve() #[[sve:[0-9]+]] { __attribute__((target_version("sve"))) int fmv(void) { return 0; } -// CHECK: define dso_local i32 @fmv._Msve-bf16() #[[sve_bf16_ebf16:[0-9]+]] { -__attribute__((target_version("sve-bf16"))) int fmv(void) { return 0; } - -// CHECK: define dso_local i32 @fmv._Msve-ebf16() #[[sve_bf16_ebf16:[0-9]+]] { -__attribute__((target_version("sve-ebf16"))) int fmv(void) { return 0; } - -// CHECK: define dso_local i32 @fmv._Msve-i8mm() #[[sve_i8mm:[0-9]+]] { -__attribute__((target_version("sve-i8mm"))) int fmv(void) { return 0; } - // CHECK: define dso_local i32 @fmv._Msve2() #[[sve2:[0-9]+]] { __attribute__((target_version("sve2"))) int fmv(void) { return 0; } @@ -209,8 +200,6 @@ int caller() { // CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+neon,+outline-atomics,+sme,+sme2,+v8a" // CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a" // CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a" -// CHECK: attributes #[[sve_bf16_ebf16]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a" -// CHECK: attributes #[[sve_i8mm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+i8mm,+neon,+outline-atomics,+sve,+v8a" // CHECK: attributes #[[sve2]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+v8a" // CHECK: attributes #[[sve2_aes]] = { {{.*}} "target-features"="+aes,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-aes,+v8a" // CHECK: attributes #[[sve2_bitperm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-bitperm,+v8a" diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index dc0cc429abffd18..cd09e05b25e4cd8 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -27,11 +27,11 @@ int foo() { inline int __attribute__((target_version("sha2+aes+f64mm"))) fmv_inline(void) { return 1; } inline int __attribute__((target_version("fp16+fcma+rdma+sme+ fp16 "))) fmv_inline(void) { return 2; } inline int __attribute__((target_version("sha3+i8mm+f32mm"))) fmv_inline(void) { return 12; } -inline int __attribute__((target_version("dit+sve-ebf16"))) fmv_inline(void) { return 8; } +inline int __attribute__((target_version("dit+ebf16"))) fmv_inline(void) { return 8; } inline int __attribute__((target_version("dpb+rcpc2 "))) fmv_inline(void) { return 6; } inline int __attribute__((target_version(" dpb2 + jscvt"))) fmv_inline(void) { return 7; } inline int __attribute__((target_version("rcpc+frintts"))) fmv_inline(void) { return 3; } -inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { return 4; } +inline int __attribute__((target_version("sve+bf16"))) fmv_inline(void) { return 4; } inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; } inline int __attribute__((target_version("sve2+sve2-aes+sve2-bitperm"))) fmv_inline(void) { return 9; } inline int __attribute__((target_version("sve2-sm4+memtag"))) fmv_inline(void) { return 10; } @@ -680,7 +680,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMsve-ebf16 +// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMebf16 // CHECK-SAME: () #[[ATTR28:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 8 @@ -708,7 +708,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MsveMsve-bf16 +// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mbf16Msve // CHECK-SAME: () #[[ATTR32:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 4 @@ -837,20 +837,20 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: ret ptr @fmv_inline._Msve2-aesMsve2-sha3 // CHECK: resolver_else12: // CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 4295098368 -// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 4295098368 +// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 1207959552 +// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 1207959552 // CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]] // CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]] // CHECK: resolver_return13: -// CHECK-NEXT: ret ptr @fmv_inline._MditMsve-ebf16 +// CHECK-NEXT: ret ptr @fmv_inline._Mbf16Msve // CHECK: resolver_else14: // CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 3221225472 -// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 3221225472 +// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 268566528 +// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 268566528 // CHECK-NEXT: [[TMP35:%.*]] = and i1 true, [[TMP34]] // CHECK-NEXT: br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]] // CHECK: resolver_return15: -// CHECK-NEXT: ret ptr @fmv_inline._MsveMsve-bf16 +// CHECK-NEXT: ret ptr @fmv_inline._MditMebf16 // CHECK: resolver_else16: // CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 // CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 20971520 diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c index a723c5965c5bcde..e101fefd2b67c4b 100644 --- a/clang/test/Sema/attr-target-clones-aarch64.c +++ b/clang/test/Sema/attr-target-clones-aarch64.c @@ -7,7 +7,7 @@ void __attribute__((target_clones("default+sha3"))) warn1(void); // expected-error@+2 {{'target_clones' and 'target_version' attributes are not compatible}} // expected-note@+1 {{conflicting attribute is here}} -void __attribute__((target_version("sve-bf16"), target_clones("sme+memtag"))) not_compat(void); +void __attribute__((target_version("sve"), target_clones("sme+memtag"))) not_compat(void); int redecl(void); int __attribute__((target_clones("frintts", "simd+fp", "default"))) redecl(void) { return 1; } @@ -78,4 +78,4 @@ int useage(void) { // expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}} int __attribute__((target_clones("sve2-sha3+ssbs", "sm4"))) mv_after_use(void) { return 1; } // expected-error@+1 {{'main' cannot be a multiversioned function}} -int __attribute__((target_clones("sve-i8mm"))) main() { return 1; } +int __attribute__((target_clones("i8mm"))) main() { return 1; } diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp index 2c85f9735a87b16..c0a645713b21871 100644 --- a/clang/test/SemaCXX/attr-target-version.cpp +++ b/clang/test/SemaCXX/attr-target-version.cpp @@ -49,7 +49,7 @@ double __attribute__((target_version("rcpc"))) diff_type1(void); auto __attribute__((target_version("rcpc2"))) diff_type2(void) -> int { return 1; } //expected-error@+1 {{multiversioned function declaration has a different return type}} -auto __attribute__((target_version("sve-bf16"))) diff_type2(void) -> long { return (long)1; } +auto __attribute__((target_version("bf16"))) diff_type2(void) -> long { return (long)1; } int __attribute__((target_version("fp16fml"))) diff_type3(void) noexcept(false) { return 1; } //expected-error@+2 {{exception specification in declaration does not match previous declaration}} @@ -75,7 +75,7 @@ auto __attribute__((target_version("dpb2"))) ret3(void) -> int { return 1; } class Cls { __attribute__((target_version("rng"))) Cls(); // expected-error@-1 {{attribute 'target_version' multiversioned functions do not yet support constructors}} - __attribute__((target_version("sve-i8mm"))) ~Cls(); + __attribute__((target_version("i8mm"))) ~Cls(); // expected-error@-1 {{attribute 'target_version' multiversioned functions do not yet support destructors}} Cls &__attribute__((target_version("f32mm"))) operator=(const Cls &) = default; diff --git a/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc b/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc index 902fa8f79ab8164..e454524c9cb6a23 100644 --- a/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc +++ b/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc @@ -53,9 +53,9 @@ enum CPUFeatures { FEAT_EBF16, FEAT_RPRES, FEAT_SVE, - FEAT_SVE_BF16, - FEAT_SVE_EBF16, - FEAT_SVE_I8MM, + RESERVED_FEAT_SVE_BF16, // previously used and now ABI legacy + RESERVED_FEAT_SVE_EBF16, // previously used and now ABI legacy + RESERVED_FEAT_SVE_I8MM, // previously used and now ABI legacy FEAT_SVE_F32MM, FEAT_SVE_F64MM, FEAT_SVE2, diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc index 0c76a4fe9b9f2f2..4e25feb2e90c635 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc @@ -65,14 +65,10 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_I8MM); if (hwcap2 & HWCAP2_EBF16) setCPUFeature(FEAT_EBF16); - if (hwcap2 & HWCAP2_SVE_EBF16) - setCPUFeature(FEAT_SVE_EBF16); if (hwcap2 & HWCAP2_DGH) setCPUFeature(FEAT_DGH); if (hwcap2 & HWCAP2_FRINT) setCPUFeature(FEAT_FRINTTS); - if (hwcap2 & HWCAP2_SVEI8MM) - setCPUFeature(FEAT_SVE_I8MM); if (hwcap2 & HWCAP2_SVEF32MM) setCPUFeature(FEAT_SVE_F32MM); if (hwcap2 & HWCAP2_SVEF64MM) @@ -119,8 +115,6 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_RCPC3); if (hwcap2 & HWCAP2_BF16) setCPUFeature(FEAT_BF16); - if (hwcap2 & HWCAP2_SVEBF16) - setCPUFeature(FEAT_SVE_BF16); if (hwcap & HWCAP_SVE) setCPUFeature(FEAT_SVE); if (hwcap2 & HWCAP2_SVE2) diff --git a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc index 902fa8f79ab8164..e454524c9cb6a23 100644 --- a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc +++ b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc @@ -53,9 +53,9 @@ enum CPUFeatures { FEAT_EBF16, FEAT_RPRES, FEAT_SVE, - FEAT_SVE_BF16, - FEAT_SVE_EBF16, - FEAT_SVE_I8MM, + RESERVED_FEAT_SVE_BF16, // previously used and now ABI legacy + RESERVED_FEAT_SVE_EBF16, // previously used and now ABI legacy + RESERVED_FEAT_SVE_I8MM, // previously used and now ABI legacy FEAT_SVE_F32MM, FEAT_SVE_F64MM, FEAT_SVE2, diff --git a/llvm/lib/Target/AArch64/AArch64FMV.td b/llvm/lib/Target/AArch64/AArch64FMV.td index 7146b041fe5d150..12d841445b80f75 100644 --- a/llvm/lib/Target/AArch64/AArch64FMV.td +++ b/llvm/lib/Target/AArch64/AArch64FMV.td @@ -81,9 +81,6 @@ def : FMVExtension<"sme-i16i64", "FEAT_SME_I64", "+sme,+sme-i16i64,+bf16", 570>; def : FMVExtension<"sme2", "FEAT_SME2", "+sme2,+sme,+bf16", 580>; def : FMVExtension<"ssbs", "FEAT_SSBS2", "+ssbs", 490>; def : FMVExtension<"sve", "FEAT_SVE", "+sve,+fullfp16,+fp-armv8,+neon", 310>; -def : FMVExtension<"sve-bf16", "FEAT_SVE_BF16", "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 320>; -def : FMVExtension<"sve-ebf16", "FEAT_SVE_EBF16", "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 330>; -def : FMVExtension<"sve-i8mm", "FEAT_SVE_I8MM", "+sve,+i8mm,+fullfp16,+fp-armv8,+neon", 340>; def : FMVExtension<"sve2", "FEAT_SVE2", "+sve2,+sve,+fullfp16,+fp-armv8,+neon", 370>; def : FMVExtension<"sve2-aes", "FEAT_SVE_PMULL128", "+sve2,+sve,+aes,+sve2-aes,+fullfp16,+fp-armv8,+neon", 380>; def : FMVExtension<"sve2-bitperm", "FEAT_SVE_BITPERM", "+sve2,+sve,+sve2-bitperm,+fullfp16,+fp-armv8,+neon", 400>; From bc999ee57af61a75511f73b9544051984490344d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 30 Oct 2024 11:54:48 +0000 Subject: [PATCH 420/425] [PhaseOrdering][X86] Add test coverage for #94546 --- .../Transforms/PhaseOrdering/X86/pr94546.ll | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll new file mode 100644 index 000000000000000..1d4cee45b668565 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX,AVX2 + +define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) { +; SSE-LABEL: @PR94546( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: ret <4 x double> [[TMP4]] +; +; AVX-LABEL: @PR94546( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; AVX-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; AVX-NEXT: ret <4 x double> [[TMP4]] +; + %vecext = extractelement <4 x double> %a, i32 0 + %vecext1 = extractelement <4 x double> %a, i32 1 + %add = fadd double %vecext, %vecext1 + %vecinit = insertelement <4 x double> poison, double %add, i32 0 + %vecext2 = extractelement <4 x double> %a, i32 2 + %vecext3 = extractelement <4 x double> %a, i32 3 + %add4 = fadd double %vecext2, %vecext3 + %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1 + %vecext6 = extractelement <4 x double> %b, i32 0 + %vecext7 = extractelement <4 x double> %b, i32 1 + %add8 = fadd double %vecext6, %vecext7 + %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2 + %vecext10 = extractelement <4 x double> %b, i32 2 + %vecext11 = extractelement <4 x double> %b, i32 3 + %add12 = fadd double %vecext10, %vecext11 + %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3 + %shuffle = shufflevector <4 x double> %vecinit13, <4 x double> %a, <4 x i32> + ret <4 x double> %shuffle +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX1: {{.*}} +; AVX2: {{.*}} From cea9dd833cf800aeb005286b2667483cc5a8d688 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 30 Oct 2024 11:58:59 +0000 Subject: [PATCH 421/425] [CodeGen] Change MachineInstr::isConstantValuePHI to return Register. NFC. (#112901) --- llvm/include/llvm/CodeGen/MachineInstr.h | 4 ++-- llvm/lib/CodeGen/MachineInstr.cpp | 9 +++------ llvm/lib/CodeGen/MachineSSAUpdater.cpp | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 76a7b8662bae66c..360517324746341 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -1764,8 +1764,8 @@ class MachineInstr bool isDereferenceableInvariantLoad() const; /// If the specified instruction is a PHI that always merges together the - /// same virtual register, return the register, otherwise return 0. - unsigned isConstantValuePHI() const; + /// same virtual register, return the register, otherwise return Register(). + Register isConstantValuePHI() const; /// Return true if this instruction has side effects that are not modeled /// by mayLoad / mayStore, etc. diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 0d78c2cafbaf63c..c1bd0bb5b7162e1 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -1535,19 +1535,16 @@ bool MachineInstr::isDereferenceableInvariantLoad() const { return true; } -/// isConstantValuePHI - If the specified instruction is a PHI that always -/// merges together the same virtual register, return the register, otherwise -/// return 0. -unsigned MachineInstr::isConstantValuePHI() const { +Register MachineInstr::isConstantValuePHI() const { if (!isPHI()) - return 0; + return {}; assert(getNumOperands() >= 3 && "It's illegal to have a PHI without source operands"); Register Reg = getOperand(1).getReg(); for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) if (getOperand(i).getReg() != Reg) - return 0; + return {}; return Reg; } diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp index c7a673b12d8c509..f0a136751bbffaa 100644 --- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -201,7 +201,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB, // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. - if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { + if (Register ConstVal = InsertedPHI->isConstantValuePHI()) { InsertedPHI->eraseFromParent(); return ConstVal; } From 55e4e3ff653356a9079906e209099684723caa4c Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Wed, 30 Oct 2024 12:07:47 +0000 Subject: [PATCH 422/425] [Flang][OpenMP] Access full list of entry block syms and vars (NFC) (#113681) This patch adds methods to `EntryBlockArgs` to access the full list of entry block argument-related symbols and variables, in their standard order. This helps centralizing this logic in as few places as possible to avoid future inconsistencies. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 01a40d6e2204ef2..876feca9b6f5be2 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -76,6 +76,18 @@ struct EntryBlockArgs { reduction.isValid() && taskReduction.isValid() && useDeviceAddr.isValid() && useDevicePtr.isValid(); } + + auto getSyms() const { + return llvm::concat( + inReduction.syms, map.syms, priv.syms, reduction.syms, + taskReduction.syms, useDeviceAddr.syms, useDevicePtr.syms); + } + + auto getVars() const { + return llvm::concat( + inReduction.vars, map.vars, priv.vars, reduction.vars, + taskReduction.vars, useDeviceAddr.vars, useDevicePtr.vars); + } }; } // namespace @@ -1506,8 +1518,7 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable, genEntryBlock(converter, args, op->getRegion(0)); bindEntryBlockArgs( converter, llvm::cast(op), args); - return llvm::to_vector(llvm::concat( - args.priv.syms, args.reduction.syms)); + return llvm::to_vector(args.getSyms()); }; assert((!enableDelayedPrivatization || dsp) && @@ -1581,11 +1592,11 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Operation *terminator = lower::genOpenMPTerminator(builder, sectionsOp, loc); - auto reductionCallback = [&](mlir::Operation *op) { + auto genRegionEntryCB = [&](mlir::Operation *op) { genEntryBlock(converter, args, op->getRegion(0)); bindEntryBlockArgs( converter, llvm::cast(op), args); - return reductionSyms; + return llvm::to_vector(args.getSyms()); }; // Generate nested SECTION constructs. @@ -1611,7 +1622,7 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval, llvm::omp::Directive::OMPD_section) .setClauses(§ionQueue.begin()->clauses) - .setGenRegionEntryCb(reductionCallback), + .setGenRegionEntryCb(genRegionEntryCB), sectionQueue, sectionQueue.begin()); } From 85f3d5ca4994ff70a72f6ad81948bf4721e15ef1 Mon Sep 17 00:00:00 2001 From: SpencerAbson Date: Wed, 30 Oct 2024 12:11:34 +0000 Subject: [PATCH 423/425] [AArch64] Add assembly/disassembly for SVE COMPACT (b/h) and EXPAND (#114053) This patch adds assembly/disassembly support for the following SVE2.2 instructions - COMPACT (byte, halfword) - EXPAND - Allow selection of `COMPACT` (word/halfword) in streaming mode if the target has FEAT_SME2p2 (see [COMPACT ]( https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/COMPACT--Copy-active-vector-elements-to-lower-numbered-elements-)) - Rename predicates guarding instructions that are illegal in streaming SVE mode without FEAT_SME2p2 - In accordance with https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions Co-authored-by: Marian Lukac marian.lukac@arm.com --- llvm/lib/Target/AArch64/AArch64.td | 3 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 7 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 17 ++- llvm/lib/Target/AArch64/SVEInstrFormats.td | 43 ++++++- .../test/MC/AArch64/SVE/compact-diagnostics.s | 4 +- llvm/test/MC/AArch64/SVE/compact.s | 4 +- .../MC/AArch64/SVE2p2/compact-diagnostics.s | 65 ++++++++++ llvm/test/MC/AArch64/SVE2p2/compact.s | 33 +++++ .../MC/AArch64/SVE2p2/expand-diagnostics.s | 120 ++++++++++++++++++ llvm/test/MC/AArch64/SVE2p2/expand.s | 39 ++++++ 10 files changed, 320 insertions(+), 15 deletions(-) create mode 100644 llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/compact.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/expand.s diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 9bb508b783c36a0..6854cccaafa1d78 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -73,7 +73,8 @@ def SVEUnsupported : AArch64Unsupported { SVE2Unsupported.F); } -let F = [HasSME2p2, HasSVE2p2orSME2p2] in +let F = [HasSME2p2, HasSVE2p2orSME2p2, HasNonStreamingSVEorSME2p2, + HasNonStreamingSVE2p2orSME2p2] in def SME2p2Unsupported : AArch64Unsupported; def SME2p1Unsupported : AArch64Unsupported { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6194de2d56b6304..457e918728ae27a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -244,7 +244,7 @@ def HasSVEorSME : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">, AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), "sve or sme">; -def HasSVEorSME2p2 +def HasNonStreamingSVEorSME2p2 : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||" "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">, AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p2), @@ -281,6 +281,11 @@ def HasSMEF16F16orSMEF8F16 : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">, AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16), "sme-f16f16 or sme-f8f16">; +def HasNonStreamingSVE2p2orSME2p2 + : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p2()) ||" + "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">, + AssemblerPredicateWithAll<(any_of FeatureSVE2p2, FeatureSME2p2), + "sme2p2 or sve2p2">; // A subset of NEON instructions are legal in Streaming SVE execution mode, // so don't need the additional check for 'isNeonAvailable'. diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 5c5ae898a8ac022..c9ee2d0059a9fee 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -928,9 +928,10 @@ let Predicates = [HasSVEorSME] in { defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; } // End HasSVEorSME -let Predicates = [HasSVE] in { - defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>; -} // End HasSVE +// COMPACT - word and doubleword +let Predicates = [HasNonStreamingSVEorSME2p2] in { + defm COMPACT_ZPZ : sve_int_perm_compact_sd<"compact", int_aarch64_sve_compact>; +} let Predicates = [HasSVEorSME] in { defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; @@ -4305,6 +4306,16 @@ let Predicates = [HasSVE2p2orSME2p2] in { } // End HasSME2p2orSVE2p2 +//===----------------------------------------------------------------------===// +// SME2.2 or SVE2.2 instructions - Legal in streaming mode iff target has SME2p2 +//===----------------------------------------------------------------------===// +let Predicates = [HasNonStreamingSVE2p2orSME2p2] in { + // SVE2 EXPAND + defm EXPAND_ZPZ : sve2_int_perm_expand<"expand">; + // SVE COMPACT - byte and halfword + defm COMPACT_ZPZ : sve_int_perm_compact_bh<"compact">; +} + //===----------------------------------------------------------------------===// // SVE2 FP8 instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 88a0983aa1480d2..3637a63684a0dee 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -7315,6 +7315,32 @@ multiclass sve2_int_perm_splice_cons { def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>; } +class sve2_int_perm_expand sz, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> Zd; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz; + let Inst{21-13} = 0b110001100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let hasSideEffects = 0; +} + +multiclass sve2_int_perm_expand { + def _B : sve2_int_perm_expand<0b00, asm, ZPR8>; + def _H : sve2_int_perm_expand<0b01, asm, ZPR16>; + def _S : sve2_int_perm_expand<0b10, asm, ZPR32>; + def _D : sve2_int_perm_expand<0b11, asm, ZPR64>; +} + class sve_int_perm_rev sz8_64, bits<2> opc, string asm, ZPRRegOp zprty> : I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn), @@ -7476,7 +7502,7 @@ multiclass sve_int_perm_cpy_v { (!cast(NAME # _H) $passthru, $pg, $splat)>; } -class sve_int_perm_compact +class sve_int_perm_compact sz, string asm, ZPRRegOp zprty> : I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn), asm, "\t$Zd, $Pg, $Zn", "", @@ -7484,8 +7510,8 @@ class sve_int_perm_compact bits<3> Pg; bits<5> Zd; bits<5> Zn; - let Inst{31-23} = 0b000001011; - let Inst{22} = sz; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz; let Inst{21-13} = 0b100001100; let Inst{12-10} = Pg; let Inst{9-5} = Zn; @@ -7494,9 +7520,9 @@ class sve_int_perm_compact let hasSideEffects = 0; } -multiclass sve_int_perm_compact { - def _S : sve_int_perm_compact<0b0, asm, ZPR32>; - def _D : sve_int_perm_compact<0b1, asm, ZPR64>; +multiclass sve_int_perm_compact_sd { + def _S : sve_int_perm_compact<0b10, asm, ZPR32>; + def _D : sve_int_perm_compact<0b11, asm, ZPR64>; def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _S)>; @@ -7504,6 +7530,11 @@ multiclass sve_int_perm_compact { def : SVE_2_Op_Pat(NAME # _D)>; } +multiclass sve_int_perm_compact_bh { + def _B : sve_int_perm_compact<0b00, asm, ZPR8>; + def _H : sve_int_perm_compact<0b01, asm, ZPR16>; +} + //===----------------------------------------------------------------------===// // SVE Memory - Contiguous Load Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AArch64/SVE/compact-diagnostics.s b/llvm/test/MC/AArch64/SVE/compact-diagnostics.s index a3d86267d917b53..b8ff8cc46201f28 100644 --- a/llvm/test/MC/AArch64/SVE/compact-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/compact-diagnostics.s @@ -28,12 +28,12 @@ compact z31.s, p7, z31.d // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: compact z31.b, p7, z31.b -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: compact z31.b, p7, z31.b // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: compact z31.h, p7, z31.h -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: compact z31.h, p7, z31.h // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/compact.s b/llvm/test/MC/AArch64/SVE/compact.s index ff815980781d79e..a9b47dea246bee9 100644 --- a/llvm/test/MC/AArch64/SVE/compact.s +++ b/llvm/test/MC/AArch64/SVE/compact.s @@ -12,11 +12,11 @@ compact z31.s, p7, z31.s // CHECK-INST: compact z31.s, p7, z31.s // CHECK-ENCODING: [0xff,0x9f,0xa1,0x05] -// CHECK-ERROR: instruction requires: sve +// CHECK-ERROR: instruction requires: sve or sme2p2 // CHECK-UNKNOWN: 05a19fff compact z31.d, p7, z31.d // CHECK-INST: compact z31.d, p7, z31.d // CHECK-ENCODING: [0xff,0x9f,0xe1,0x05] -// CHECK-ERROR: instruction requires: sve +// CHECK-ERROR: instruction requires: sve or sme2p2 // CHECK-UNKNOWN: 05e19fff diff --git a/llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s new file mode 100644 index 000000000000000..acf00e7f7a600fb --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s @@ -0,0 +1,65 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid element widths + +compact z31.h, p7, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: compact z31.h, p7, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z31.b, p7, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: compact z31.b, p7, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate operation + +compact z23.b, p7/m, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: compact z23.b, p7/m, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z23.b, p7.b, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: compact z23.b, p7.b, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z23.h, p7/z, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: compact z23.h, p7/z, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z23.h, p7.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: compact z23.h, p7.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Predicate not in restricted predicate range + +compact z23.b, p8, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: compact z23.b, p8, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z23.h, p8, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: compact z23.h, p8, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z31.b, p7/z, z6.b +compact z31.b, p7, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: compact z31.b, p7, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z31, z6 +compact z31.h, p7, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: compact z31.h, p7, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/compact.s b/llvm/test/MC/AArch64/SVE2p2/compact.s new file mode 100644 index 000000000000000..0170b3832bea674 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/compact.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +compact z0.b, p0, z0.b // 00000101-00100001-10000000-00000000 +// CHECK-INST: compact z0.b, p0, z0.b +// CHECK-ENCODING: [0x00,0x80,0x21,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05218000 + +compact z21.b, p5, z10.b // 00000101-00100001-10010101-01010101 +// CHECK-INST: compact z21.b, p5, z10.b +// CHECK-ENCODING: [0x55,0x95,0x21,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05219555 + +compact z31.h, p7, z31.h // 00000101-01100001-10011111-11111111 +// CHECK-INST: compact z31.h, p7, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x61,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05619fff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s new file mode 100644 index 000000000000000..b9a95f399a168a7 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s @@ -0,0 +1,120 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid element widths. + +expand z23.b, p3, z13.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.b, p3, z13.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p3, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.h, p3, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.s, p3, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.s, p3, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.d, p3, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.d, p3, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.q, p3, z13.q +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.q, p3, z13.q +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate operation + +expand z23.b, p3/z, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: expand z23.b, p3/z, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.b, p3.b, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.b, p3.b, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p3/m, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: expand z23.h, p3/m, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p3.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.h, p3.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.s, p3/z, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: expand z23.s, p3/z, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.s, p3.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.s, p3.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.d, p3/m, z13.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: expand z23.d, p3/m, z13.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.d, p3.d, z13.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.d, p3.d, z13.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Predicate not in restricted predicate range + +expand z23.b, p8, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.b, p8, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.b, p3.b, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.b, p3.b, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p8, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.h, p8, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p3.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.h, p3.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}} + +expand z23.s, p8, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.s, p8, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.d, p8, z13.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.d, p8, z13.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z31, z6 +expand z31.b, p7, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: expand z31.b, p7, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z31.b, p0/z, z6.b +expand z31.b, p0, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: expand z31.b, p0, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/expand.s b/llvm/test/MC/AArch64/SVE2p2/expand.s new file mode 100644 index 000000000000000..7523978380fbd7e --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/expand.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +expand z0.b, p0, z0.b // 00000101-00110001-10000000-00000000 +// CHECK-INST: expand z0.b, p0, z0.b +// CHECK-ENCODING: [0x00,0x80,0x31,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05318000 + +expand z21.h, p5, z10.h // 00000101-01110001-10010101-01010101 +// CHECK-INST: expand z21.h, p5, z10.h +// CHECK-ENCODING: [0x55,0x95,0x71,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05719555 + +expand z23.s, p3, z13.s // 00000101-10110001-10001101-10110111 +// CHECK-INST: expand z23.s, p3, z13.s +// CHECK-ENCODING: [0xb7,0x8d,0xb1,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05b18db7 + +expand z31.d, p7, z31.d // 00000101-11110001-10011111-11111111 +// CHECK-INST: expand z31.d, p7, z31.d +// CHECK-ENCODING: [0xff,0x9f,0xf1,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05f19fff \ No newline at end of file From 15f63ec19cde170f4cdddc5513a0f0be9515569a Mon Sep 17 00:00:00 2001 From: Lukacma Date: Wed, 30 Oct 2024 12:32:32 +0000 Subject: [PATCH 424/425] [AARCH64] Add assembly/disassmbly for FIRST,LASTP instr. (#114049) This patch adds assembly/disassembly and tests for new FIRSTP and LASTP instructions introduced in https://developer.arm.com/documentation/ddi0602/2024-09 --------- Co-authored-by: SpencerAbson --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 5 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 14 ++- .../MC/AArch64/SVE2p2/firstp-diagnostics.s | 32 +++++++ llvm/test/MC/AArch64/SVE2p2/firstp.s | 87 +++++++++++++++++++ .../MC/AArch64/SVE2p2/lastp-diagnostics.s | 32 +++++++ llvm/test/MC/AArch64/SVE2p2/lastp.s | 87 +++++++++++++++++++ 6 files changed, 252 insertions(+), 5 deletions(-) create mode 100644 llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/firstp.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s create mode 100644 llvm/test/MC/AArch64/SVE2p2/lastp.s diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index c9ee2d0059a9fee..2564ddc5f2e5ca8 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2129,7 +2129,7 @@ let Predicates = [HasSVEorSME] in { defm CNTH_XPiI : sve_int_count<0b010, "cnth", int_aarch64_sve_cnth>; defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>; defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>; - defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>; + defm CNTP_XPP : sve_int_pcount_pred<0b000, "cntp", int_aarch64_sve_cntp>; def : Pat<(i64 (AArch64CttzElts nxv16i1:$Op1)), (CNTP_XPP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op1), @@ -4304,6 +4304,9 @@ let Predicates = [HasSVE2p2orSME2p2] in { def SXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1000, "sxtw", ZPR64>; def UXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1010, "uxtw", ZPR64>; + // SVE predicate count + defm FIRSTP_XPP : sve_int_pcount_pred_tmp<0b001, "firstp">; + defm LASTP_XPP : sve_int_pcount_pred_tmp<0b010, "lastp">; } // End HasSME2p2orSVE2p2 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 3637a63684a0dee..9fa184c545705b8 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1046,7 +1046,7 @@ multiclass sve_int_count_v opc, string asm, (!cast(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>; } -class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, +class sve_int_pcount_pred sz8_64, bits<3> opc, string asm, PPRRegOp pprty> : I<(outs GPR64:$Rd), (ins PPRAny:$Pg, pprty:$Pn), asm, "\t$Rd, $Pg, $Pn", @@ -1058,17 +1058,17 @@ class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, let Inst{31-24} = 0b00100101; let Inst{23-22} = sz8_64; let Inst{21-19} = 0b100; - let Inst{18-16} = opc{3-1}; + let Inst{18-16} = opc{2-0}; let Inst{15-14} = 0b10; let Inst{13-10} = Pg; - let Inst{9} = opc{0}; + let Inst{9} = 0b0; let Inst{8-5} = Pn; let Inst{4-0} = Rd; let hasSideEffects = 0; } -multiclass sve_int_pcount_pred opc, string asm, +multiclass sve_int_pcount_pred opc, string asm, SDPatternOperator int_op> { def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>; def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>; @@ -1081,6 +1081,12 @@ multiclass sve_int_pcount_pred opc, string asm, def : SVE_2_Op_Pat(NAME # _D)>; } +multiclass sve_int_pcount_pred_tmp opc, string asm> { + def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>; + def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>; + def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>; + def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>; +} //===----------------------------------------------------------------------===// // SVE Element Count Group //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s new file mode 100644 index 000000000000000..4309fd49ecf79f1 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid predicate operand + +firstp x0, p15, p0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: firstp x0, p15, p0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +firstp x0, p15.b, p0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: firstp x0, p15.b, p0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +firstp x0, p15.q, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: firstp x0, p15.q, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid register types + +firstp sp, p15, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: firstp sp, p15, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +firstp w0, p15, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: firstp w0, p15, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/firstp.s b/llvm/test/MC/AArch64/SVE2p2/firstp.s new file mode 100644 index 000000000000000..629bee5576fc7d7 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/firstp.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +firstp x0, p0, p0.b // 00100101-00100001-10000000-00000000 +// CHECK-INST: firstp x0, p0, p0.b +// CHECK-ENCODING: [0x00,0x80,0x21,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25218000 + +firstp x23, p11, p13.b // 00100101-00100001-10101101-10110111 +// CHECK-INST: firstp x23, p11, p13.b +// CHECK-ENCODING: [0xb7,0xad,0x21,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2521adb7 + +firstp xzr, p15, p15.b // 00100101-00100001-10111101-11111111 +// CHECK-INST: firstp xzr, p15, p15.b +// CHECK-ENCODING: [0xff,0xbd,0x21,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2521bdff + +firstp x0, p0, p0.h // 00100101-01100001-10000000-00000000 +// CHECK-INST: firstp x0, p0, p0.h +// CHECK-ENCODING: [0x00,0x80,0x61,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25618000 + +firstp x23, p11, p13.h // 00100101-01100001-10101101-10110111 +// CHECK-INST: firstp x23, p11, p13.h +// CHECK-ENCODING: [0xb7,0xad,0x61,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2561adb7 + +firstp xzr, p15, p15.h // 00100101-01100001-10111101-11111111 +// CHECK-INST: firstp xzr, p15, p15.h +// CHECK-ENCODING: [0xff,0xbd,0x61,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2561bdff + +firstp x0, p0, p0.s // 00100101-10100001-10000000-00000000 +// CHECK-INST: firstp x0, p0, p0.s +// CHECK-ENCODING: [0x00,0x80,0xa1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a18000 + +firstp x23, p11, p13.s // 00100101-10100001-10101101-10110111 +// CHECK-INST: firstp x23, p11, p13.s +// CHECK-ENCODING: [0xb7,0xad,0xa1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a1adb7 + +firstp xzr, p15, p15.s // 00100101-10100001-10111101-11111111 +// CHECK-INST: firstp xzr, p15, p15.s +// CHECK-ENCODING: [0xff,0xbd,0xa1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a1bdff + +firstp x0, p0, p0.d // 00100101-11100001-10000000-00000000 +// CHECK-INST: firstp x0, p0, p0.d +// CHECK-ENCODING: [0x00,0x80,0xe1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e18000 + +firstp x23, p11, p13.d // 00100101-11100001-10101101-10110111 +// CHECK-INST: firstp x23, p11, p13.d +// CHECK-ENCODING: [0xb7,0xad,0xe1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e1adb7 + +firstp xzr, p15, p15.d // 00100101-11100001-10111101-11111111 +// CHECK-INST: firstp xzr, p15, p15.d +// CHECK-ENCODING: [0xff,0xbd,0xe1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e1bdff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s new file mode 100644 index 000000000000000..e277bdbc6aa8b35 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid predicate operand + +lastp x0, p15, p0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: lastp x0, p15, p0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +lastp x0, p15.b, p0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: lastp x0, p15.b, p0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +lastp x0, p15.q, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: lastp x0, p15.q, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid register types + +lastp sp, p15, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: lastp sp, p15, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +lastp w0, p15, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: lastp w0, p15, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/lastp.s b/llvm/test/MC/AArch64/SVE2p2/lastp.s new file mode 100644 index 000000000000000..1ffa0a7d1fcc198 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/lastp.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +lastp x0, p0, p0.b // 00100101-00100010-10000000-00000000 +// CHECK-INST: lastp x0, p0, p0.b +// CHECK-ENCODING: [0x00,0x80,0x22,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25228000 + +lastp x23, p11, p13.b // 00100101-00100010-10101101-10110111 +// CHECK-INST: lastp x23, p11, p13.b +// CHECK-ENCODING: [0xb7,0xad,0x22,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2522adb7 + +lastp xzr, p15, p15.b // 00100101-00100010-10111101-11111111 +// CHECK-INST: lastp xzr, p15, p15.b +// CHECK-ENCODING: [0xff,0xbd,0x22,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2522bdff + +lastp x0, p0, p0.h // 00100101-01100010-10000000-00000000 +// CHECK-INST: lastp x0, p0, p0.h +// CHECK-ENCODING: [0x00,0x80,0x62,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25628000 + +lastp x23, p11, p13.h // 00100101-01100010-10101101-10110111 +// CHECK-INST: lastp x23, p11, p13.h +// CHECK-ENCODING: [0xb7,0xad,0x62,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2562adb7 + +lastp xzr, p15, p15.h // 00100101-01100010-10111101-11111111 +// CHECK-INST: lastp xzr, p15, p15.h +// CHECK-ENCODING: [0xff,0xbd,0x62,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2562bdff + +lastp x0, p0, p0.s // 00100101-10100010-10000000-00000000 +// CHECK-INST: lastp x0, p0, p0.s +// CHECK-ENCODING: [0x00,0x80,0xa2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a28000 + +lastp x23, p11, p13.s // 00100101-10100010-10101101-10110111 +// CHECK-INST: lastp x23, p11, p13.s +// CHECK-ENCODING: [0xb7,0xad,0xa2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a2adb7 + +lastp xzr, p15, p15.s // 00100101-10100010-10111101-11111111 +// CHECK-INST: lastp xzr, p15, p15.s +// CHECK-ENCODING: [0xff,0xbd,0xa2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a2bdff + +lastp x0, p0, p0.d // 00100101-11100010-10000000-00000000 +// CHECK-INST: lastp x0, p0, p0.d +// CHECK-ENCODING: [0x00,0x80,0xe2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e28000 + +lastp x23, p11, p13.d // 00100101-11100010-10101101-10110111 +// CHECK-INST: lastp x23, p11, p13.d +// CHECK-ENCODING: [0xb7,0xad,0xe2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e2adb7 + +lastp xzr, p15, p15.d // 00100101-11100010-10111101-11111111 +// CHECK-INST: lastp xzr, p15, p15.d +// CHECK-ENCODING: [0xff,0xbd,0xe2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e2bdff \ No newline at end of file From ea050ab1a99547294e195064bd90ca9822d292cf Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 30 Oct 2024 21:36:39 +0900 Subject: [PATCH 425/425] [mlir][Transforms][NFC] Dialect conversion: Reformat materialization error message (#114176) This commit changes the format of the materialization error message. Previously: `failed to legalize unresolved materialization from ('f64') to 'f32' that remained live after conversion` Now: `failed to legalize unresolved materialization from ('f64') to ('f32') that remained live after conversion` This commit is in preparation of merging the 1:1 and 1:N dialect conversions. At that point, target materializations may create more than one SSA value. I am sending this change as a separate PR to keep the main PR smaller. --- mlir/lib/Transforms/Utils/DialectConversion.cpp | 10 +++++----- .../Bufferization/Transforms/finalizing-bufferize.mlir | 2 +- .../Transforms/test-legalize-erased-op-with-uses.mlir | 2 +- .../test/Transforms/test-legalize-type-conversion.mlir | 10 +++++----- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 44cf8331d55a733..0a62628b9ad2407 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -2457,11 +2457,11 @@ legalizeUnresolvedMaterialization(RewriterBase &rewriter, } } - InFlightDiagnostic diag = op->emitError() - << "failed to legalize unresolved materialization " - "from (" - << inputOperands.getTypes() << ") to " << outputType - << " that remained live after conversion"; + InFlightDiagnostic diag = + op->emitError() << "failed to legalize unresolved materialization " + "from (" + << inputOperands.getTypes() << ") to (" << outputType + << ") that remained live after conversion"; diag.attachNote(op->getUsers().begin()->getLoc()) << "see existing live user here: " << *op->getUsers().begin(); return failure(); diff --git a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir index ab18ce05e355d3e..bae94c1be4da908 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir @@ -78,7 +78,7 @@ func.func @static_layout_to_no_layout_cast(%m: memref) -> memref> { %0 = bufferization.to_tensor %m : memref - // expected-error @+1 {{failed to legalize unresolved materialization from ('memref') to 'memref>' that remained live after conversion}} + // expected-error @+1 {{failed to legalize unresolved materialization from ('memref') to ('memref>') that remained live after conversion}} %1 = bufferization.to_memref %0 : memref> // expected-note @below{{see existing live user here}} return %1 : memref> diff --git a/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir b/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir index 6e8f0162e505d06..031442b0ee2daf0 100644 --- a/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir +++ b/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir @@ -3,7 +3,7 @@ // Test that an error is emitted when an operation is marked as "erased", but // has users that live across the conversion. func.func @remove_all_ops(%arg0: i32) -> i32 { - // expected-error@below {{failed to legalize unresolved materialization from () to 'i32' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from () to ('i32') that remained live after conversion}} %0 = "test.illegal_op_a"() : () -> i32 // expected-note@below {{see existing live user here}} return %0 : i32 diff --git a/mlir/test/Transforms/test-legalize-type-conversion.mlir b/mlir/test/Transforms/test-legalize-type-conversion.mlir index f130adff42f8cdd..db8bd0f6378d29b 100644 --- a/mlir/test/Transforms/test-legalize-type-conversion.mlir +++ b/mlir/test/Transforms/test-legalize-type-conversion.mlir @@ -2,7 +2,7 @@ func.func @test_invalid_arg_materialization( - // expected-error@below {{failed to legalize unresolved materialization from () to 'i16' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from () to ('i16') that remained live after conversion}} %arg0: i16) { // expected-note@below{{see existing live user here}} "foo.return"(%arg0) : (i16) -> () @@ -21,7 +21,7 @@ func.func @test_valid_arg_materialization(%arg0: i64) { // ----- func.func @test_invalid_result_materialization() { - // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f16') that remained live after conversion}} %result = "test.type_producer"() : () -> f16 // expected-note@below{{see existing live user here}} "foo.return"(%result) : (f16) -> () @@ -30,7 +30,7 @@ func.func @test_invalid_result_materialization() { // ----- func.func @test_invalid_result_materialization() { - // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f16') that remained live after conversion}} %result = "test.type_producer"() : () -> f16 // expected-note@below{{see existing live user here}} "foo.return"(%result) : (f16) -> () @@ -50,7 +50,7 @@ func.func @test_transitive_use_materialization() { // ----- func.func @test_transitive_use_invalid_materialization() { - // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f16') that remained live after conversion}} %result = "test.another_type_producer"() : () -> f16 // expected-note@below{{see existing live user here}} "foo.return"(%result) : (f16) -> () @@ -102,7 +102,7 @@ func.func @test_block_argument_not_converted() { // Make sure argument type changes aren't implicitly forwarded. func.func @test_signature_conversion_no_converter() { "test.signature_conversion_no_converter"() ({ - // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f32' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f32') that remained live after conversion}} ^bb0(%arg0: f32): "test.type_consumer"(%arg0) : (f32) -> () // expected-note@below{{see existing live user here}}